torch_em.data.sampler
1import numpy as np 2from typing import List, Optional, Callable, Union 3 4 5class MinForegroundSampler: 6 """A sampler to reject samples with a low fraction of foreground pixels in the labels. 7 8 Args: 9 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 10 background_id: The id of the background label. 11 p_reject: The probability for rejecting a sample that does not meet the criterion. 12 """ 13 def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0): 14 self.min_fraction = min_fraction 15 self.background_id = background_id 16 self.p_reject = p_reject 17 18 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 19 """Check the sample. 20 21 Args: 22 x: The raw data. 23 y: The label data. 24 25 Returns: 26 Whether to accept this sample. 27 """ 28 # We do this so that it's also possible to use the MinForegroundSampler for raw data, 29 # in order to filter out areas that are not imaged, for example for large EM volumes. 30 if y is None: 31 y = x 32 33 size = float(y.size) 34 if isinstance(self.background_id, int): 35 foreground_fraction = np.sum(y != self.background_id) / size 36 else: 37 foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size 38 39 if foreground_fraction > self.min_fraction: 40 return True 41 else: 42 return np.random.rand() > self.p_reject 43 44 45class MinSemanticLabelForegroundSampler: 46 """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels. 47 48 Args: 49 semantic_ids: The ids for semantic classes to take into account. 50 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 51 min_fraction_per_id: Whether the minimal fraction is applied on a per label basis. 52 p_reject: The probability for rejecting a sample that does not meet the criterion. 53 """ 54 def __init__( 55 self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0 56 ): 57 self.semantic_ids = semantic_ids 58 self.min_fraction = min_fraction 59 self.p_reject = p_reject 60 self.min_fraction_per_id = min_fraction_per_id 61 62 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 63 """Check the sample. 64 65 Args: 66 x: The raw data. 67 y: The label data. 68 69 Returns: 70 Whether to accept this sample. 71 """ 72 size = float(y.size) 73 74 if self.min_fraction_per_id: 75 foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids] 76 else: 77 foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))] 78 79 if all(fraction > self.min_fraction for fraction in foreground_fraction): 80 return True 81 else: 82 return np.random.rand() > self.p_reject 83 84 85class MinIntensitySampler: 86 """A sampler to reject samples with low intensity in the raw data. 87 88 Args: 89 min_intensity: The minimal intensity for accepting a sample. 90 function: The function for computing the intensity of the raw data. 91 Can either be a function or a name of a valid numpy atttribute. 92 In the latter case the corresponding numpy function is used. 93 p_reject: The probability for rejecting a sample that does not meet the criterion. 94 """ 95 def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0): 96 self.min_intensity = min_intensity 97 self.function = getattr(np, function) if isinstance(function, str) else function 98 assert callable(self.function) 99 self.p_reject = p_reject 100 101 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 102 """Check the sample. 103 104 Args: 105 x: The raw data. 106 y: The label data. 107 108 Returns: 109 Whether to accept this sample. 110 """ 111 intensity = self.function(x) 112 if intensity > self.min_intensity: 113 return True 114 else: 115 return np.random.rand() > self.p_reject 116 117 118class MinInstanceSampler: 119 """A sampler to reject samples with too few instances in the label data. 120 121 Args: 122 min_num_instances: The minimum number of instances for accepting a sample. 123 p_reject: The probability for rejecting a sample that does not meet the criterion. 124 min_size: The minimal size for instances to be taken into account. 125 exclude_ids: The ids to exclude (i.e. not consider) for sampling a valid input. 126 """ 127 def __init__( 128 self, 129 min_num_instances: int = 2, 130 p_reject: float = 1.0, 131 min_size: Optional[int] = None, 132 exclude_ids: Optional[List[int]] = None, 133 ): 134 self.min_num_instances = min_num_instances 135 self.p_reject = p_reject 136 self.min_size = min_size 137 self.exclude_ids = exclude_ids 138 139 if self.exclude_ids is not None: 140 assert isinstance(self.exclude_ids, list) 141 142 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 143 """Check the sample. 144 145 Args: 146 x: The raw data. 147 y: The label data. 148 149 Returns: 150 Whether to accept this sample. 151 """ 152 uniques, sizes = np.unique(y, return_counts=True) 153 154 if self.min_size is not None: 155 filter_ids = uniques[sizes >= self.min_size] 156 uniques = filter_ids 157 158 if self.exclude_ids is not None: 159 uniques = [idx for idx in uniques if idx not in self.exclude_ids] 160 161 if len(uniques) >= self.min_num_instances: 162 return True 163 else: 164 return np.random.rand() > self.p_reject 165 166 167class MinTwoInstanceSampler: 168 """A sampler to reject samples with less than two instances in the label data. 169 170 This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow. 171 172 Args: 173 p_reject: The probability for rejecting a sample that does not meet the criterion. 174 """ 175 def __init__(self, p_reject: float = 1.0): 176 self.p_reject = p_reject 177 178 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 179 """Check the sample. 180 181 Args: 182 x: The raw data. 183 y: The label data. 184 185 Returns: 186 Whether to accept this sample. 187 """ 188 sample_value = y.flat[0] 189 if (y != sample_value).any(): 190 return True 191 else: 192 return np.random.rand() > self.p_reject 193 194 195# Sometimes it is necessary to ignore boundaries to the background 196# in RF training. Then, it can happen that even with 2 instances in the 197# image while sampling there will be no boundary in the image after the 198# label_transform and the RF only learns one class (Error further downstream). 199# Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform 200# is then calculated multiple times. 201class MinNoToBackgroundBoundarySampler: 202 """A sampler to reject samples for training with pseudo labels. 203 204 Args: 205 trafo: The transformation. 206 min_fraction: The minimal fraction for accepting a sample. 207 p_reject: The probability for rejecting a sample that does not meet the criterion. 208 """ 209 def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0): 210 self.trafo = trafo 211 self.bg_label = trafo.bg_label 212 self.mask_label = trafo.mask_label 213 self.min_fraction = min_fraction 214 self.p_reject = p_reject 215 216 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 217 """Check the sample. 218 219 Args: 220 x: The raw data. 221 y: The label data. 222 223 Returns: 224 Whether to accept this sample. 225 """ 226 y_boundaries = self.trafo(y) 227 y_boundaries[y_boundaries == self.mask_label] = self.bg_label 228 size = float(y_boundaries.size) 229 foreground_fraction = np.sum(y_boundaries != self.bg_label) / size 230 if foreground_fraction > self.min_fraction: 231 return True 232 else: 233 return np.random.rand() > self.p_reject
class
MinForegroundSampler:
6class MinForegroundSampler: 7 """A sampler to reject samples with a low fraction of foreground pixels in the labels. 8 9 Args: 10 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 11 background_id: The id of the background label. 12 p_reject: The probability for rejecting a sample that does not meet the criterion. 13 """ 14 def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0): 15 self.min_fraction = min_fraction 16 self.background_id = background_id 17 self.p_reject = p_reject 18 19 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 20 """Check the sample. 21 22 Args: 23 x: The raw data. 24 y: The label data. 25 26 Returns: 27 Whether to accept this sample. 28 """ 29 # We do this so that it's also possible to use the MinForegroundSampler for raw data, 30 # in order to filter out areas that are not imaged, for example for large EM volumes. 31 if y is None: 32 y = x 33 34 size = float(y.size) 35 if isinstance(self.background_id, int): 36 foreground_fraction = np.sum(y != self.background_id) / size 37 else: 38 foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size 39 40 if foreground_fraction > self.min_fraction: 41 return True 42 else: 43 return np.random.rand() > self.p_reject
A sampler to reject samples with a low fraction of foreground pixels in the labels.
Arguments:
- min_fraction: The minimal fraction of foreground pixels for accepting a sample.
- background_id: The id of the background label.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinSemanticLabelForegroundSampler:
46class MinSemanticLabelForegroundSampler: 47 """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels. 48 49 Args: 50 semantic_ids: The ids for semantic classes to take into account. 51 min_fraction: The minimal fraction of foreground pixels for accepting a sample. 52 min_fraction_per_id: Whether the minimal fraction is applied on a per label basis. 53 p_reject: The probability for rejecting a sample that does not meet the criterion. 54 """ 55 def __init__( 56 self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0 57 ): 58 self.semantic_ids = semantic_ids 59 self.min_fraction = min_fraction 60 self.p_reject = p_reject 61 self.min_fraction_per_id = min_fraction_per_id 62 63 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 64 """Check the sample. 65 66 Args: 67 x: The raw data. 68 y: The label data. 69 70 Returns: 71 Whether to accept this sample. 72 """ 73 size = float(y.size) 74 75 if self.min_fraction_per_id: 76 foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids] 77 else: 78 foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))] 79 80 if all(fraction > self.min_fraction for fraction in foreground_fraction): 81 return True 82 else: 83 return np.random.rand() > self.p_reject
A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.
Arguments:
- semantic_ids: The ids for semantic classes to take into account.
- min_fraction: The minimal fraction of foreground pixels for accepting a sample.
- min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinIntensitySampler:
86class MinIntensitySampler: 87 """A sampler to reject samples with low intensity in the raw data. 88 89 Args: 90 min_intensity: The minimal intensity for accepting a sample. 91 function: The function for computing the intensity of the raw data. 92 Can either be a function or a name of a valid numpy atttribute. 93 In the latter case the corresponding numpy function is used. 94 p_reject: The probability for rejecting a sample that does not meet the criterion. 95 """ 96 def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0): 97 self.min_intensity = min_intensity 98 self.function = getattr(np, function) if isinstance(function, str) else function 99 assert callable(self.function) 100 self.p_reject = p_reject 101 102 def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool: 103 """Check the sample. 104 105 Args: 106 x: The raw data. 107 y: The label data. 108 109 Returns: 110 Whether to accept this sample. 111 """ 112 intensity = self.function(x) 113 if intensity > self.min_intensity: 114 return True 115 else: 116 return np.random.rand() > self.p_reject
A sampler to reject samples with low intensity in the raw data.
Arguments:
- min_intensity: The minimal intensity for accepting a sample.
- function: The function for computing the intensity of the raw data. Can either be a function or a name of a valid numpy atttribute. In the latter case the corresponding numpy function is used.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinInstanceSampler:
119class MinInstanceSampler: 120 """A sampler to reject samples with too few instances in the label data. 121 122 Args: 123 min_num_instances: The minimum number of instances for accepting a sample. 124 p_reject: The probability for rejecting a sample that does not meet the criterion. 125 min_size: The minimal size for instances to be taken into account. 126 exclude_ids: The ids to exclude (i.e. not consider) for sampling a valid input. 127 """ 128 def __init__( 129 self, 130 min_num_instances: int = 2, 131 p_reject: float = 1.0, 132 min_size: Optional[int] = None, 133 exclude_ids: Optional[List[int]] = None, 134 ): 135 self.min_num_instances = min_num_instances 136 self.p_reject = p_reject 137 self.min_size = min_size 138 self.exclude_ids = exclude_ids 139 140 if self.exclude_ids is not None: 141 assert isinstance(self.exclude_ids, list) 142 143 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 144 """Check the sample. 145 146 Args: 147 x: The raw data. 148 y: The label data. 149 150 Returns: 151 Whether to accept this sample. 152 """ 153 uniques, sizes = np.unique(y, return_counts=True) 154 155 if self.min_size is not None: 156 filter_ids = uniques[sizes >= self.min_size] 157 uniques = filter_ids 158 159 if self.exclude_ids is not None: 160 uniques = [idx for idx in uniques if idx not in self.exclude_ids] 161 162 if len(uniques) >= self.min_num_instances: 163 return True 164 else: 165 return np.random.rand() > self.p_reject
A sampler to reject samples with too few instances in the label data.
Arguments:
- min_num_instances: The minimum number of instances for accepting a sample.
- p_reject: The probability for rejecting a sample that does not meet the criterion.
- min_size: The minimal size for instances to be taken into account.
- exclude_ids: The ids to exclude (i.e. not consider) for sampling a valid input.
MinInstanceSampler( min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None, exclude_ids: Optional[List[int]] = None)
128 def __init__( 129 self, 130 min_num_instances: int = 2, 131 p_reject: float = 1.0, 132 min_size: Optional[int] = None, 133 exclude_ids: Optional[List[int]] = None, 134 ): 135 self.min_num_instances = min_num_instances 136 self.p_reject = p_reject 137 self.min_size = min_size 138 self.exclude_ids = exclude_ids 139 140 if self.exclude_ids is not None: 141 assert isinstance(self.exclude_ids, list)
class
MinTwoInstanceSampler:
168class MinTwoInstanceSampler: 169 """A sampler to reject samples with less than two instances in the label data. 170 171 This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow. 172 173 Args: 174 p_reject: The probability for rejecting a sample that does not meet the criterion. 175 """ 176 def __init__(self, p_reject: float = 1.0): 177 self.p_reject = p_reject 178 179 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 180 """Check the sample. 181 182 Args: 183 x: The raw data. 184 y: The label data. 185 186 Returns: 187 Whether to accept this sample. 188 """ 189 sample_value = y.flat[0] 190 if (y != sample_value).any(): 191 return True 192 else: 193 return np.random.rand() > self.p_reject
A sampler to reject samples with less than two instances in the label data.
This is ca. 10x faster than MinInstanceSampler(min_num_instances=2)
that which uses np.unique, which is slow.
Arguments:
- p_reject: The probability for rejecting a sample that does not meet the criterion.
class
MinNoToBackgroundBoundarySampler:
202class MinNoToBackgroundBoundarySampler: 203 """A sampler to reject samples for training with pseudo labels. 204 205 Args: 206 trafo: The transformation. 207 min_fraction: The minimal fraction for accepting a sample. 208 p_reject: The probability for rejecting a sample that does not meet the criterion. 209 """ 210 def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0): 211 self.trafo = trafo 212 self.bg_label = trafo.bg_label 213 self.mask_label = trafo.mask_label 214 self.min_fraction = min_fraction 215 self.p_reject = p_reject 216 217 def __call__(self, x: np.ndarray, y: np.ndarray) -> bool: 218 """Check the sample. 219 220 Args: 221 x: The raw data. 222 y: The label data. 223 224 Returns: 225 Whether to accept this sample. 226 """ 227 y_boundaries = self.trafo(y) 228 y_boundaries[y_boundaries == self.mask_label] = self.bg_label 229 size = float(y_boundaries.size) 230 foreground_fraction = np.sum(y_boundaries != self.bg_label) / size 231 if foreground_fraction > self.min_fraction: 232 return True 233 else: 234 return np.random.rand() > self.p_reject
A sampler to reject samples for training with pseudo labels.
Arguments:
- trafo: The transformation.
- min_fraction: The minimal fraction for accepting a sample.
- p_reject: The probability for rejecting a sample that does not meet the criterion.