torch_em.data.sampler

  1import numpy as np
  2from typing import List, Optional, Callable, Union
  3
  4
  5class MinForegroundSampler:
  6    """A sampler to reject samples with a low fraction of foreground pixels in the labels.
  7
  8    Args:
  9        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
 10        background_id: The id of the background label.
 11        p_reject: The probability for rejecting a sample that does not meet the criterion.
 12    """
 13    def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0):
 14        self.min_fraction = min_fraction
 15        self.background_id = background_id
 16        self.p_reject = p_reject
 17
 18    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
 19        """Check the sample.
 20
 21        Args:
 22            x: The raw data.
 23            y: The label data.
 24
 25        Returns:
 26            Whether to accept this sample.
 27        """
 28        # We do this so that it's also possible to use the MinForegroundSampler for raw data,
 29        # in order to filter out areas that are not imaged, for example for large EM volumes.
 30        if y is None:
 31            y = x
 32
 33        size = float(y.size)
 34        if isinstance(self.background_id, int):
 35            foreground_fraction = np.sum(y != self.background_id) / size
 36        else:
 37            foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size
 38
 39        if foreground_fraction > self.min_fraction:
 40            return True
 41        else:
 42            return np.random.rand() > self.p_reject
 43
 44
 45class MinSemanticLabelForegroundSampler:
 46    """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.
 47
 48    Args:
 49        semantic_ids: The ids for semantic classes to take into account.
 50        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
 51        min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
 52        p_reject: The probability for rejecting a sample that does not meet the criterion.
 53    """
 54    def __init__(
 55        self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0
 56    ):
 57        self.semantic_ids = semantic_ids
 58        self.min_fraction = min_fraction
 59        self.p_reject = p_reject
 60        self.min_fraction_per_id = min_fraction_per_id
 61
 62    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
 63        """Check the sample.
 64
 65        Args:
 66            x: The raw data.
 67            y: The label data.
 68
 69        Returns:
 70            Whether to accept this sample.
 71        """
 72        size = float(y.size)
 73
 74        if self.min_fraction_per_id:
 75            foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids]
 76        else:
 77            foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))]
 78
 79        if all(fraction > self.min_fraction for fraction in foreground_fraction):
 80            return True
 81        else:
 82            return np.random.rand() > self.p_reject
 83
 84
 85class MinIntensitySampler:
 86    """A sampler to reject samples with low intensity in the raw data.
 87
 88    Args:
 89        min_intensity: The minimal intensity for accepting a sample.
 90        function: The function for computing the intensity of the raw data.
 91            Can either be a function or a name of a valid numpy atttribute.
 92            In the latter case the corresponding numpy function is used.
 93        p_reject: The probability for rejecting a sample that does not meet the criterion.
 94    """
 95    def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0):
 96        self.min_intensity = min_intensity
 97        self.function = getattr(np, function) if isinstance(function, str) else function
 98        assert callable(self.function)
 99        self.p_reject = p_reject
100
101    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
102        """Check the sample.
103
104        Args:
105            x: The raw data.
106            y: The label data.
107
108        Returns:
109            Whether to accept this sample.
110        """
111        intensity = self.function(x)
112        if intensity > self.min_intensity:
113            return True
114        else:
115            return np.random.rand() > self.p_reject
116
117
118class MinInstanceSampler:
119    """A sampler to reject samples with too few instances in the label data.
120
121    Args:
122        min_num_instances: The minimum number of instances for accepting a sample.
123        p_reject: The probability for rejecting a sample that does not meet the criterion.
124        min_size: The minimal size for instances to be taken into account.
125        exclude_ids: The ids to exclude (i.e. not consider) for sampling a valid input.
126    """
127    def __init__(
128        self,
129        min_num_instances: int = 2,
130        p_reject: float = 1.0,
131        min_size: Optional[int] = None,
132        exclude_ids: Optional[List[int]] = None,
133    ):
134        self.min_num_instances = min_num_instances
135        self.p_reject = p_reject
136        self.min_size = min_size
137        self.exclude_ids = exclude_ids
138
139        if self.exclude_ids is not None:
140            assert isinstance(self.exclude_ids, list)
141
142    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
143        """Check the sample.
144
145        Args:
146            x: The raw data.
147            y: The label data.
148
149        Returns:
150            Whether to accept this sample.
151        """
152        uniques, sizes = np.unique(y, return_counts=True)
153
154        if self.min_size is not None:
155            filter_ids = uniques[sizes >= self.min_size]
156            uniques = filter_ids
157
158        if self.exclude_ids is not None:
159            uniques = [idx for idx in uniques if idx not in self.exclude_ids]
160
161        if len(uniques) >= self.min_num_instances:
162            return True
163        else:
164            return np.random.rand() > self.p_reject
165
166
167class MinTwoInstanceSampler:
168    """A sampler to reject samples with less than two instances in the label data.
169
170    This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow.
171
172    Args:
173        p_reject: The probability for rejecting a sample that does not meet the criterion.
174    """
175    def __init__(self, p_reject: float = 1.0):
176        self.p_reject = p_reject
177
178    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
179        """Check the sample.
180
181        Args:
182            x: The raw data.
183            y: The label data.
184
185        Returns:
186            Whether to accept this sample.
187        """
188        sample_value = y.flat[0]
189        if (y != sample_value).any():
190            return True
191        else:
192            return np.random.rand() > self.p_reject
193
194
195# Sometimes it is necessary to ignore boundaries to the background
196# in RF training. Then, it can happen that even with 2 instances in the
197# image while sampling there will be no boundary in the image after the
198# label_transform and the RF only learns one class (Error further downstream).
199# Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform
200# is then calculated multiple times.
201class MinNoToBackgroundBoundarySampler:
202    """A sampler to reject samples for training with pseudo labels.
203
204    Args:
205        trafo: The transformation.
206        min_fraction: The minimal fraction for accepting a sample.
207        p_reject: The probability for rejecting a sample that does not meet the criterion.
208    """
209    def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0):
210        self.trafo = trafo
211        self.bg_label = trafo.bg_label
212        self.mask_label = trafo.mask_label
213        self.min_fraction = min_fraction
214        self.p_reject = p_reject
215
216    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
217        """Check the sample.
218
219        Args:
220            x: The raw data.
221            y: The label data.
222
223        Returns:
224            Whether to accept this sample.
225        """
226        y_boundaries = self.trafo(y)
227        y_boundaries[y_boundaries == self.mask_label] = self.bg_label
228        size = float(y_boundaries.size)
229        foreground_fraction = np.sum(y_boundaries != self.bg_label) / size
230        if foreground_fraction > self.min_fraction:
231            return True
232        else:
233            return np.random.rand() > self.p_reject
class MinForegroundSampler:
 6class MinForegroundSampler:
 7    """A sampler to reject samples with a low fraction of foreground pixels in the labels.
 8
 9    Args:
10        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
11        background_id: The id of the background label.
12        p_reject: The probability for rejecting a sample that does not meet the criterion.
13    """
14    def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0):
15        self.min_fraction = min_fraction
16        self.background_id = background_id
17        self.p_reject = p_reject
18
19    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
20        """Check the sample.
21
22        Args:
23            x: The raw data.
24            y: The label data.
25
26        Returns:
27            Whether to accept this sample.
28        """
29        # We do this so that it's also possible to use the MinForegroundSampler for raw data,
30        # in order to filter out areas that are not imaged, for example for large EM volumes.
31        if y is None:
32            y = x
33
34        size = float(y.size)
35        if isinstance(self.background_id, int):
36            foreground_fraction = np.sum(y != self.background_id) / size
37        else:
38            foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size
39
40        if foreground_fraction > self.min_fraction:
41            return True
42        else:
43            return np.random.rand() > self.p_reject

A sampler to reject samples with a low fraction of foreground pixels in the labels.

Arguments:
  • min_fraction: The minimal fraction of foreground pixels for accepting a sample.
  • background_id: The id of the background label.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinForegroundSampler(min_fraction: float, background_id: int = 0, p_reject: float = 1.0)
14    def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0):
15        self.min_fraction = min_fraction
16        self.background_id = background_id
17        self.p_reject = p_reject
min_fraction
background_id
p_reject
class MinSemanticLabelForegroundSampler:
46class MinSemanticLabelForegroundSampler:
47    """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.
48
49    Args:
50        semantic_ids: The ids for semantic classes to take into account.
51        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
52        min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
53        p_reject: The probability for rejecting a sample that does not meet the criterion.
54    """
55    def __init__(
56        self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0
57    ):
58        self.semantic_ids = semantic_ids
59        self.min_fraction = min_fraction
60        self.p_reject = p_reject
61        self.min_fraction_per_id = min_fraction_per_id
62
63    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
64        """Check the sample.
65
66        Args:
67            x: The raw data.
68            y: The label data.
69
70        Returns:
71            Whether to accept this sample.
72        """
73        size = float(y.size)
74
75        if self.min_fraction_per_id:
76            foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids]
77        else:
78            foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))]
79
80        if all(fraction > self.min_fraction for fraction in foreground_fraction):
81            return True
82        else:
83            return np.random.rand() > self.p_reject

A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.

Arguments:
  • semantic_ids: The ids for semantic classes to take into account.
  • min_fraction: The minimal fraction of foreground pixels for accepting a sample.
  • min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinSemanticLabelForegroundSampler( semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0)
55    def __init__(
56        self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0
57    ):
58        self.semantic_ids = semantic_ids
59        self.min_fraction = min_fraction
60        self.p_reject = p_reject
61        self.min_fraction_per_id = min_fraction_per_id
semantic_ids
min_fraction
p_reject
min_fraction_per_id
class MinIntensitySampler:
 86class MinIntensitySampler:
 87    """A sampler to reject samples with low intensity in the raw data.
 88
 89    Args:
 90        min_intensity: The minimal intensity for accepting a sample.
 91        function: The function for computing the intensity of the raw data.
 92            Can either be a function or a name of a valid numpy atttribute.
 93            In the latter case the corresponding numpy function is used.
 94        p_reject: The probability for rejecting a sample that does not meet the criterion.
 95    """
 96    def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0):
 97        self.min_intensity = min_intensity
 98        self.function = getattr(np, function) if isinstance(function, str) else function
 99        assert callable(self.function)
100        self.p_reject = p_reject
101
102    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
103        """Check the sample.
104
105        Args:
106            x: The raw data.
107            y: The label data.
108
109        Returns:
110            Whether to accept this sample.
111        """
112        intensity = self.function(x)
113        if intensity > self.min_intensity:
114            return True
115        else:
116            return np.random.rand() > self.p_reject

A sampler to reject samples with low intensity in the raw data.

Arguments:
  • min_intensity: The minimal intensity for accepting a sample.
  • function: The function for computing the intensity of the raw data. Can either be a function or a name of a valid numpy atttribute. In the latter case the corresponding numpy function is used.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinIntensitySampler( min_intensity: int, function: Union[str, Callable] = 'median', p_reject: float = 1.0)
 96    def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0):
 97        self.min_intensity = min_intensity
 98        self.function = getattr(np, function) if isinstance(function, str) else function
 99        assert callable(self.function)
100        self.p_reject = p_reject
min_intensity
function
p_reject
class MinInstanceSampler:
119class MinInstanceSampler:
120    """A sampler to reject samples with too few instances in the label data.
121
122    Args:
123        min_num_instances: The minimum number of instances for accepting a sample.
124        p_reject: The probability for rejecting a sample that does not meet the criterion.
125        min_size: The minimal size for instances to be taken into account.
126        exclude_ids: The ids to exclude (i.e. not consider) for sampling a valid input.
127    """
128    def __init__(
129        self,
130        min_num_instances: int = 2,
131        p_reject: float = 1.0,
132        min_size: Optional[int] = None,
133        exclude_ids: Optional[List[int]] = None,
134    ):
135        self.min_num_instances = min_num_instances
136        self.p_reject = p_reject
137        self.min_size = min_size
138        self.exclude_ids = exclude_ids
139
140        if self.exclude_ids is not None:
141            assert isinstance(self.exclude_ids, list)
142
143    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
144        """Check the sample.
145
146        Args:
147            x: The raw data.
148            y: The label data.
149
150        Returns:
151            Whether to accept this sample.
152        """
153        uniques, sizes = np.unique(y, return_counts=True)
154
155        if self.min_size is not None:
156            filter_ids = uniques[sizes >= self.min_size]
157            uniques = filter_ids
158
159        if self.exclude_ids is not None:
160            uniques = [idx for idx in uniques if idx not in self.exclude_ids]
161
162        if len(uniques) >= self.min_num_instances:
163            return True
164        else:
165            return np.random.rand() > self.p_reject

A sampler to reject samples with too few instances in the label data.

Arguments:
  • min_num_instances: The minimum number of instances for accepting a sample.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
  • min_size: The minimal size for instances to be taken into account.
  • exclude_ids: The ids to exclude (i.e. not consider) for sampling a valid input.
MinInstanceSampler( min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None, exclude_ids: Optional[List[int]] = None)
128    def __init__(
129        self,
130        min_num_instances: int = 2,
131        p_reject: float = 1.0,
132        min_size: Optional[int] = None,
133        exclude_ids: Optional[List[int]] = None,
134    ):
135        self.min_num_instances = min_num_instances
136        self.p_reject = p_reject
137        self.min_size = min_size
138        self.exclude_ids = exclude_ids
139
140        if self.exclude_ids is not None:
141            assert isinstance(self.exclude_ids, list)
min_num_instances
p_reject
min_size
exclude_ids
class MinTwoInstanceSampler:
168class MinTwoInstanceSampler:
169    """A sampler to reject samples with less than two instances in the label data.
170
171    This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow.
172
173    Args:
174        p_reject: The probability for rejecting a sample that does not meet the criterion.
175    """
176    def __init__(self, p_reject: float = 1.0):
177        self.p_reject = p_reject
178
179    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
180        """Check the sample.
181
182        Args:
183            x: The raw data.
184            y: The label data.
185
186        Returns:
187            Whether to accept this sample.
188        """
189        sample_value = y.flat[0]
190        if (y != sample_value).any():
191            return True
192        else:
193            return np.random.rand() > self.p_reject

A sampler to reject samples with less than two instances in the label data.

This is ca. 10x faster than MinInstanceSampler(min_num_instances=2) that which uses np.unique, which is slow.

Arguments:
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinTwoInstanceSampler(p_reject: float = 1.0)
176    def __init__(self, p_reject: float = 1.0):
177        self.p_reject = p_reject
p_reject
class MinNoToBackgroundBoundarySampler:
202class MinNoToBackgroundBoundarySampler:
203    """A sampler to reject samples for training with pseudo labels.
204
205    Args:
206        trafo: The transformation.
207        min_fraction: The minimal fraction for accepting a sample.
208        p_reject: The probability for rejecting a sample that does not meet the criterion.
209    """
210    def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0):
211        self.trafo = trafo
212        self.bg_label = trafo.bg_label
213        self.mask_label = trafo.mask_label
214        self.min_fraction = min_fraction
215        self.p_reject = p_reject
216
217    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
218        """Check the sample.
219
220        Args:
221            x: The raw data.
222            y: The label data.
223
224        Returns:
225            Whether to accept this sample.
226        """
227        y_boundaries = self.trafo(y)
228        y_boundaries[y_boundaries == self.mask_label] = self.bg_label
229        size = float(y_boundaries.size)
230        foreground_fraction = np.sum(y_boundaries != self.bg_label) / size
231        if foreground_fraction > self.min_fraction:
232            return True
233        else:
234            return np.random.rand() > self.p_reject

A sampler to reject samples for training with pseudo labels.

Arguments:
  • trafo: The transformation.
  • min_fraction: The minimal fraction for accepting a sample.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinNoToBackgroundBoundarySampler(trafo, min_fraction: float = 0.01, p_reject: float = 1.0)
210    def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0):
211        self.trafo = trafo
212        self.bg_label = trafo.bg_label
213        self.mask_label = trafo.mask_label
214        self.min_fraction = min_fraction
215        self.p_reject = p_reject
trafo
bg_label
mask_label
min_fraction
p_reject