torch_em.data.sampler
1import numpy as np 2from typing import List, Optional, Callable, Union 3 4 5class MinForegroundSampler: 6 """A sampler to reject samples with a low fraction of foreground pixels in the labels. 7 8 Args: 9 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 10 background_id: The id of the background label. 11 p_reject: The probability for rejecting a sample that does not meet the criterion. 12 """ 13 def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0): 14 self.min_fraction = min_fraction 15 self.background_id = background_id 16 self.p_reject = p_reject 17 18 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 19 """Check the sample. 20 21 Args: 22 x: The raw data. 23 y: The label data. 24 25 Returns: 26 Whether to accept this sample. 27 """ 28 # We do this so that it's also possible to use the MinForegroundSampler for raw data, 29 # in order to filter out areas that are not imaged, for example for large EM volumes. 30 if y is None: 31 y = x 32 33 size = float(y.size) 34 if isinstance(self.background_id, int): 35 foreground_fraction = np.sum(y != self.background_id) / size 36 else: 37 foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size 38 39 if foreground_fraction > self.min_fraction: 40 return True 41 else: 42 return np.random.rand() > self.p_reject 43 44 45class MinSemanticLabelForegroundSampler: 46 """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels. 47 48 Args: 49 sematic_ids: The ids for semantic classes to take into account. 50 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 51 min_fraction_per_id: Whether the minimal fraction is applied on a per label basis. 52 p_reject: The probability for rejecting a sample that does not meet the criterion. 53 """ 54 def __init__( 55 self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0 56 ): 57 self.semantic_ids = semantic_ids 58 self.min_fraction = min_fraction 59 self.p_reject = p_reject 60 self.min_fraction_per_id = min_fraction_per_id 61 62 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 63 """Check the sample. 64 65 Args: 66 x: The raw data. 67 y: The label data. 68 69 Returns: 70 Whether to accept this sample. 71 """ 72 size = float(y.size) 73 74 if self.min_fraction_per_id: 75 foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids] 76 else: 77 foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))] 78 79 if all(foreground_fraction) > self.min_fraction: 80 return True 81 else: 82 return np.random.rand() > self.p_reject 83 84 85class MinIntensitySampler: 86 """A sampler to reject samples with low intensity in the raw data. 87 88 Args: 89 min_intensity: The minimal intensity for accepting a sample. 90 function: The function for computing the intensity of the raw data. 91 Can either be a function or a name of a valid numpy atttribute. 92 In the latter case the corresponding numpy function is used. 93 p_reject: The probability for rejecting a sample that does not meet the criterion. 94 """ 95 def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0): 96 self.min_intensity = min_intensity 97 self.function = getattr(np, function) if isinstance(function, str) else function 98 assert callable(self.function) 99 self.p_reject = p_reject 100 101 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 102 """Check the sample. 103 104 Args: 105 x: The raw data. 106 y: The label data. 107 108 Returns: 109 Whether to accept this sample. 110 """ 111 intensity = self.function(x) 112 if intensity > self.min_intensity: 113 return True 114 else: 115 return np.random.rand() > self.p_reject 116 117 118class MinInstanceSampler: 119 """A sampler to reject samples with too few instances in the label data. 120 121 Args: 122 min_num_instances: The minimum number of instances for accepting a sample. 123 p_reject: The probability for rejecting a sample that does not meet the criterion. 124 min_size: The minimal size for instances to be taken into account. 125 """ 126 def __init__(self, min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None): 127 self.min_num_instances = min_num_instances 128 self.p_reject = p_reject 129 self.min_size = min_size 130 131 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 132 """Check the sample. 133 134 Args: 135 x: The raw data. 136 y: The label data. 137 138 Returns: 139 Whether to accept this sample. 140 """ 141 uniques, sizes = np.unique(y, return_counts=True) 142 if self.min_size is not None: 143 filter_ids = uniques[sizes >= self.min_size] 144 uniques = filter_ids 145 146 if len(uniques) >= self.min_num_instances: 147 return True 148 else: 149 return np.random.rand() > self.p_reject 150 151 152class MinTwoInstanceSampler: 153 """A sampler to reject samples with less than two instances in the label data. 154 155 This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow. 156 157 Args: 158 p_reject: The probability for rejecting a sample that does not meet the criterion. 159 """ 160 def __init__(self, p_reject: float = 1.0): 161 self.p_reject = p_reject 162 163 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 164 """Check the sample. 165 166 Args: 167 x: The raw data. 168 y: The label data. 169 170 Returns: 171 Whether to accept this sample. 172 """ 173 sample_value = y.flat[0] 174 if (y != sample_value).any(): 175 return True 176 else: 177 return np.random.rand() > self.p_reject 178 179 180# Sometimes it is necessary to ignore boundaries to the background 181# in RF training. Then, it can happen that even with 2 instances in the 182# image while sampling there will be no boundary in the image after the 183# label_transform and the RF only learns one class (Error further downstream). 184# Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform 185# is then calculated multiple times. 186class MinNoToBackgroundBoundarySampler: 187 """A sampler to reject samples for training with pseudo labels. 188 189 Args: 190 trafo: The transformation. 191 min_fraction: The minimal fraction for accepting a sample. 192 p_reject: The probability for rejecting a sample that does not meet the criterion. 193 """ 194 def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0): 195 self.trafo = trafo 196 self.bg_label = trafo.bg_label 197 self.mask_label = trafo.mask_label 198 self.min_fraction = min_fraction 199 self.p_reject = p_reject 200 201 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 202 """Check the sample. 203 204 Args: 205 x: The raw data. 206 y: The label data. 207 208 Returns: 209 Whether to accept this sample. 210 """ 211 y_boundaries = self.trafo(y) 212 y_boundaries[y_boundaries == self.mask_label] = self.bg_label 213 size = float(y_boundaries.size) 214 foreground_fraction = np.sum(y_boundaries != self.bg_label) / size 215 if foreground_fraction > self.min_fraction: 216 return True 217 else: 218 return np.random.rand() > self.p_reject
class
MinForegroundSampler:
6class MinForegroundSampler: 7 """A sampler to reject samples with a low fraction of foreground pixels in the labels. 8 9 Args: 10 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 11 background_id: The id of the background label. 12 p_reject: The probability for rejecting a sample that does not meet the criterion. 13 """ 14 def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0): 15 self.min_fraction = min_fraction 16 self.background_id = background_id 17 self.p_reject = p_reject 18 19 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 20 """Check the sample. 21 22 Args: 23 x: The raw data. 24 y: The label data. 25 26 Returns: 27 Whether to accept this sample. 28 """ 29 # We do this so that it's also possible to use the MinForegroundSampler for raw data, 30 # in order to filter out areas that are not imaged, for example for large EM volumes. 31 if y is None: 32 y = x 33 34 size = float(y.size) 35 if isinstance(self.background_id, int): 36 foreground_fraction = np.sum(y != self.background_id) / size 37 else: 38 foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size 39 40 if foreground_fraction > self.min_fraction: 41 return True 42 else: 43 return np.random.rand() > self.p_reject
A sampler to reject samples with a low fraction of foreground pixels in the labels.
Arguments:
- min_fraction: The minimal fraction of foreground pixels for accepting a sample.
- background_id: The id of the background label.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinSemanticLabelForegroundSampler:
46class MinSemanticLabelForegroundSampler: 47 """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels. 48 49 Args: 50 sematic_ids: The ids for semantic classes to take into account. 51 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 52 min_fraction_per_id: Whether the minimal fraction is applied on a per label basis. 53 p_reject: The probability for rejecting a sample that does not meet the criterion. 54 """ 55 def __init__( 56 self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0 57 ): 58 self.semantic_ids = semantic_ids 59 self.min_fraction = min_fraction 60 self.p_reject = p_reject 61 self.min_fraction_per_id = min_fraction_per_id 62 63 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 64 """Check the sample. 65 66 Args: 67 x: The raw data. 68 y: The label data. 69 70 Returns: 71 Whether to accept this sample. 72 """ 73 size = float(y.size) 74 75 if self.min_fraction_per_id: 76 foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids] 77 else: 78 foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))] 79 80 if all(foreground_fraction) > self.min_fraction: 81 return True 82 else: 83 return np.random.rand() > self.p_reject
A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.
Arguments:
- sematic_ids: The ids for semantic classes to take into account.
- min_fraction: The minimal fraction of foreground pixels for accepting a sample.
- min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinIntensitySampler:
86class MinIntensitySampler: 87 """A sampler to reject samples with low intensity in the raw data. 88 89 Args: 90 min_intensity: The minimal intensity for accepting a sample. 91 function: The function for computing the intensity of the raw data. 92 Can either be a function or a name of a valid numpy atttribute. 93 In the latter case the corresponding numpy function is used. 94 p_reject: The probability for rejecting a sample that does not meet the criterion. 95 """ 96 def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0): 97 self.min_intensity = min_intensity 98 self.function = getattr(np, function) if isinstance(function, str) else function 99 assert callable(self.function) 100 self.p_reject = p_reject 101 102 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 103 """Check the sample. 104 105 Args: 106 x: The raw data. 107 y: The label data. 108 109 Returns: 110 Whether to accept this sample. 111 """ 112 intensity = self.function(x) 113 if intensity > self.min_intensity: 114 return True 115 else: 116 return np.random.rand() > self.p_reject
A sampler to reject samples with low intensity in the raw data.
Arguments:
- min_intensity: The minimal intensity for accepting a sample.
- function: The function for computing the intensity of the raw data. Can either be a function or a name of a valid numpy atttribute. In the latter case the corresponding numpy function is used.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinInstanceSampler:
119class MinInstanceSampler: 120 """A sampler to reject samples with too few instances in the label data. 121 122 Args: 123 min_num_instances: The minimum number of instances for accepting a sample. 124 p_reject: The probability for rejecting a sample that does not meet the criterion. 125 min_size: The minimal size for instances to be taken into account. 126 """ 127 def __init__(self, min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None): 128 self.min_num_instances = min_num_instances 129 self.p_reject = p_reject 130 self.min_size = min_size 131 132 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 133 """Check the sample. 134 135 Args: 136 x: The raw data. 137 y: The label data. 138 139 Returns: 140 Whether to accept this sample. 141 """ 142 uniques, sizes = np.unique(y, return_counts=True) 143 if self.min_size is not None: 144 filter_ids = uniques[sizes >= self.min_size] 145 uniques = filter_ids 146 147 if len(uniques) >= self.min_num_instances: 148 return True 149 else: 150 return np.random.rand() > self.p_reject
A sampler to reject samples with too few instances in the label data.
Arguments:
- min_num_instances: The minimum number of instances for accepting a sample.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
- min_size: The minimal size for instances to be taken into account.
class
MinTwoInstanceSampler:
153class MinTwoInstanceSampler: 154 """A sampler to reject samples with less than two instances in the label data. 155 156 This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow. 157 158 Args: 159 p_reject: The probability for rejecting a sample that does not meet the criterion. 160 """ 161 def __init__(self, p_reject: float = 1.0): 162 self.p_reject = p_reject 163 164 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 165 """Check the sample. 166 167 Args: 168 x: The raw data. 169 y: The label data. 170 171 Returns: 172 Whether to accept this sample. 173 """ 174 sample_value = y.flat[0] 175 if (y != sample_value).any(): 176 return True 177 else: 178 return np.random.rand() > self.p_reject
A sampler to reject samples with less than two instances in the label data.
This is ca. 10x faster than MinInstanceSampler(min_num_instances=2)
that which uses np.unique, which is slow.
Arguments:
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinNoToBackgroundBoundarySampler:
187class MinNoToBackgroundBoundarySampler: 188 """A sampler to reject samples for training with pseudo labels. 189 190 Args: 191 trafo: The transformation. 192 min_fraction: The minimal fraction for accepting a sample. 193 p_reject: The probability for rejecting a sample that does not meet the criterion. 194 """ 195 def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0): 196 self.trafo = trafo 197 self.bg_label = trafo.bg_label 198 self.mask_label = trafo.mask_label 199 self.min_fraction = min_fraction 200 self.p_reject = p_reject 201 202 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 203 """Check the sample. 204 205 Args: 206 x: The raw data. 207 y: The label data. 208 209 Returns: 210 Whether to accept this sample. 211 """ 212 y_boundaries = self.trafo(y) 213 y_boundaries[y_boundaries == self.mask_label] = self.bg_label 214 size = float(y_boundaries.size) 215 foreground_fraction = np.sum(y_boundaries != self.bg_label) / size 216 if foreground_fraction > self.min_fraction: 217 return True 218 else: 219 return np.random.rand() > self.p_reject
A sampler to reject samples for training with pseudo labels.
Arguments:
- trafo: The transformation.
- min_fraction: The minimal fraction for accepting a sample.
- p_reject: The probability for rejecting a sample that does not meet the criterion.