torch_em.data.sampler

  1import numpy as np
  2from typing import List, Optional, Callable, Union
  3
  4
  5class MinForegroundSampler:
  6    """A sampler to reject samples with a low fraction of foreground pixels in the labels.
  7
  8    Args:
  9        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
 10        background_id: The id of the background label.
 11        p_reject: The probability for rejecting a sample that does not meet the criterion.
 12    """
 13    def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0):
 14        self.min_fraction = min_fraction
 15        self.background_id = background_id
 16        self.p_reject = p_reject
 17
 18    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
 19        """Check the sample.
 20
 21        Args:
 22            x: The raw data.
 23            y: The label data.
 24
 25        Returns:
 26            Whether to accept this sample.
 27        """
 28        # We do this so that it's also possible to use the MinForegroundSampler for raw data,
 29        # in order to filter out areas that are not imaged, for example for large EM volumes.
 30        if y is None:
 31            y = x
 32
 33        size = float(y.size)
 34        if isinstance(self.background_id, int):
 35            foreground_fraction = np.sum(y != self.background_id) / size
 36        else:
 37            foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size
 38
 39        if foreground_fraction > self.min_fraction:
 40            return True
 41        else:
 42            return np.random.rand() > self.p_reject
 43
 44
 45class MinSemanticLabelForegroundSampler:
 46    """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.
 47
 48    Args:
 49        sematic_ids: The ids for semantic classes to take into account.
 50        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
 51        min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
 52        p_reject: The probability for rejecting a sample that does not meet the criterion.
 53    """
 54    def __init__(
 55        self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0
 56    ):
 57        self.semantic_ids = semantic_ids
 58        self.min_fraction = min_fraction
 59        self.p_reject = p_reject
 60        self.min_fraction_per_id = min_fraction_per_id
 61
 62    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
 63        """Check the sample.
 64
 65        Args:
 66            x: The raw data.
 67            y: The label data.
 68
 69        Returns:
 70            Whether to accept this sample.
 71        """
 72        size = float(y.size)
 73
 74        if self.min_fraction_per_id:
 75            foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids]
 76        else:
 77            foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))]
 78
 79        if all(foreground_fraction) > self.min_fraction:
 80            return True
 81        else:
 82            return np.random.rand() > self.p_reject
 83
 84
 85class MinIntensitySampler:
 86    """A sampler to reject samples with low intensity in the raw data.
 87
 88    Args:
 89        min_intensity: The minimal intensity for accepting a sample.
 90        function: The function for computing the intensity of the raw data.
 91            Can either be a function or a name of a valid numpy atttribute.
 92            In the latter case the corresponding numpy function is used.
 93        p_reject: The probability for rejecting a sample that does not meet the criterion.
 94    """
 95    def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0):
 96        self.min_intensity = min_intensity
 97        self.function = getattr(np, function) if isinstance(function, str) else function
 98        assert callable(self.function)
 99        self.p_reject = p_reject
100
101    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
102        """Check the sample.
103
104        Args:
105            x: The raw data.
106            y: The label data.
107
108        Returns:
109            Whether to accept this sample.
110        """
111        intensity = self.function(x)
112        if intensity > self.min_intensity:
113            return True
114        else:
115            return np.random.rand() > self.p_reject
116
117
118class MinInstanceSampler:
119    """A sampler to reject samples with too few instances in the label data.
120
121    Args:
122        min_num_instances: The minimum number of instances for accepting a sample.
123        p_reject: The probability for rejecting a sample that does not meet the criterion.
124        min_size: The minimal size for instances to be taken into account.
125    """
126    def __init__(self, min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None):
127        self.min_num_instances = min_num_instances
128        self.p_reject = p_reject
129        self.min_size = min_size
130
131    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
132        """Check the sample.
133
134        Args:
135            x: The raw data.
136            y: The label data.
137
138        Returns:
139            Whether to accept this sample.
140        """
141        uniques, sizes = np.unique(y, return_counts=True)
142        if self.min_size is not None:
143            filter_ids = uniques[sizes >= self.min_size]
144            uniques = filter_ids
145
146        if len(uniques) >= self.min_num_instances:
147            return True
148        else:
149            return np.random.rand() > self.p_reject
150
151
152class MinTwoInstanceSampler:
153    """A sampler to reject samples with less than two instances in the label data.
154
155    This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow.
156
157    Args:
158        p_reject: The probability for rejecting a sample that does not meet the criterion.
159    """
160    def __init__(self, p_reject: float = 1.0):
161        self.p_reject = p_reject
162
163    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
164        """Check the sample.
165
166        Args:
167            x: The raw data.
168            y: The label data.
169
170        Returns:
171            Whether to accept this sample.
172        """
173        sample_value = y.flat[0]
174        if (y != sample_value).any():
175            return True
176        else:
177            return np.random.rand() > self.p_reject
178
179
180# Sometimes it is necessary to ignore boundaries to the background
181# in RF training. Then, it can happen that even with 2 instances in the
182# image while sampling there will be no boundary in the image after the
183# label_transform and the RF only learns one class (Error further downstream).
184# Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform
185# is then calculated multiple times.
186class MinNoToBackgroundBoundarySampler:
187    """A sampler to reject samples for training with pseudo labels.
188
189    Args:
190        trafo: The transformation.
191        min_fraction: The minimal fraction for accepting a sample.
192        p_reject: The probability for rejecting a sample that does not meet the criterion.
193    """
194    def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0):
195        self.trafo = trafo
196        self.bg_label = trafo.bg_label
197        self.mask_label = trafo.mask_label
198        self.min_fraction = min_fraction
199        self.p_reject = p_reject
200
201    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
202        """Check the sample.
203
204        Args:
205            x: The raw data.
206            y: The label data.
207
208        Returns:
209            Whether to accept this sample.
210        """
211        y_boundaries = self.trafo(y)
212        y_boundaries[y_boundaries == self.mask_label] = self.bg_label
213        size = float(y_boundaries.size)
214        foreground_fraction = np.sum(y_boundaries != self.bg_label) / size
215        if foreground_fraction > self.min_fraction:
216            return True
217        else:
218            return np.random.rand() > self.p_reject
class MinForegroundSampler:
 6class MinForegroundSampler:
 7    """A sampler to reject samples with a low fraction of foreground pixels in the labels.
 8
 9    Args:
10        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
11        background_id: The id of the background label.
12        p_reject: The probability for rejecting a sample that does not meet the criterion.
13    """
14    def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0):
15        self.min_fraction = min_fraction
16        self.background_id = background_id
17        self.p_reject = p_reject
18
19    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
20        """Check the sample.
21
22        Args:
23            x: The raw data.
24            y: The label data.
25
26        Returns:
27            Whether to accept this sample.
28        """
29        # We do this so that it's also possible to use the MinForegroundSampler for raw data,
30        # in order to filter out areas that are not imaged, for example for large EM volumes.
31        if y is None:
32            y = x
33
34        size = float(y.size)
35        if isinstance(self.background_id, int):
36            foreground_fraction = np.sum(y != self.background_id) / size
37        else:
38            foreground_fraction = np.sum(np.logical_not(np.isin(y, self.background_id))) / size
39
40        if foreground_fraction > self.min_fraction:
41            return True
42        else:
43            return np.random.rand() > self.p_reject

A sampler to reject samples with a low fraction of foreground pixels in the labels.

Arguments:
  • min_fraction: The minimal fraction of foreground pixels for accepting a sample.
  • background_id: The id of the background label.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinForegroundSampler(min_fraction: float, background_id: int = 0, p_reject: float = 1.0)
14    def __init__(self, min_fraction: float, background_id: int = 0, p_reject: float = 1.0):
15        self.min_fraction = min_fraction
16        self.background_id = background_id
17        self.p_reject = p_reject
min_fraction
background_id
p_reject
class MinSemanticLabelForegroundSampler:
46class MinSemanticLabelForegroundSampler:
47    """A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.
48
49    Args:
50        sematic_ids: The ids for semantic classes to take into account.
51        min_fraction: The minimal fraction of foreground pixels for accepting a sample.
52        min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
53        p_reject: The probability for rejecting a sample that does not meet the criterion.
54    """
55    def __init__(
56        self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0
57    ):
58        self.semantic_ids = semantic_ids
59        self.min_fraction = min_fraction
60        self.p_reject = p_reject
61        self.min_fraction_per_id = min_fraction_per_id
62
63    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
64        """Check the sample.
65
66        Args:
67            x: The raw data.
68            y: The label data.
69
70        Returns:
71            Whether to accept this sample.
72        """
73        size = float(y.size)
74
75        if self.min_fraction_per_id:
76            foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids]
77        else:
78            foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))]
79
80        if all(foreground_fraction) > self.min_fraction:
81            return True
82        else:
83            return np.random.rand() > self.p_reject

A sampler to reject samples with a low fraction of foreground pixels in the semantic labels.

Arguments:
  • sematic_ids: The ids for semantic classes to take into account.
  • min_fraction: The minimal fraction of foreground pixels for accepting a sample.
  • min_fraction_per_id: Whether the minimal fraction is applied on a per label basis.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinSemanticLabelForegroundSampler( semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0)
55    def __init__(
56        self, semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0
57    ):
58        self.semantic_ids = semantic_ids
59        self.min_fraction = min_fraction
60        self.p_reject = p_reject
61        self.min_fraction_per_id = min_fraction_per_id
semantic_ids
min_fraction
p_reject
min_fraction_per_id
class MinIntensitySampler:
 86class MinIntensitySampler:
 87    """A sampler to reject samples with low intensity in the raw data.
 88
 89    Args:
 90        min_intensity: The minimal intensity for accepting a sample.
 91        function: The function for computing the intensity of the raw data.
 92            Can either be a function or a name of a valid numpy atttribute.
 93            In the latter case the corresponding numpy function is used.
 94        p_reject: The probability for rejecting a sample that does not meet the criterion.
 95    """
 96    def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0):
 97        self.min_intensity = min_intensity
 98        self.function = getattr(np, function) if isinstance(function, str) else function
 99        assert callable(self.function)
100        self.p_reject = p_reject
101
102    def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> bool:
103        """Check the sample.
104
105        Args:
106            x: The raw data.
107            y: The label data.
108
109        Returns:
110            Whether to accept this sample.
111        """
112        intensity = self.function(x)
113        if intensity > self.min_intensity:
114            return True
115        else:
116            return np.random.rand() > self.p_reject

A sampler to reject samples with low intensity in the raw data.

Arguments:
  • min_intensity: The minimal intensity for accepting a sample.
  • function: The function for computing the intensity of the raw data. Can either be a function or a name of a valid numpy atttribute. In the latter case the corresponding numpy function is used.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinIntensitySampler( min_intensity: int, function: Union[str, Callable] = 'median', p_reject: float = 1.0)
 96    def __init__(self, min_intensity: int, function: Union[str, Callable] = "median", p_reject: float = 1.0):
 97        self.min_intensity = min_intensity
 98        self.function = getattr(np, function) if isinstance(function, str) else function
 99        assert callable(self.function)
100        self.p_reject = p_reject
min_intensity
function
p_reject
class MinInstanceSampler:
119class MinInstanceSampler:
120    """A sampler to reject samples with too few instances in the label data.
121
122    Args:
123        min_num_instances: The minimum number of instances for accepting a sample.
124        p_reject: The probability for rejecting a sample that does not meet the criterion.
125        min_size: The minimal size for instances to be taken into account.
126    """
127    def __init__(self, min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None):
128        self.min_num_instances = min_num_instances
129        self.p_reject = p_reject
130        self.min_size = min_size
131
132    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
133        """Check the sample.
134
135        Args:
136            x: The raw data.
137            y: The label data.
138
139        Returns:
140            Whether to accept this sample.
141        """
142        uniques, sizes = np.unique(y, return_counts=True)
143        if self.min_size is not None:
144            filter_ids = uniques[sizes >= self.min_size]
145            uniques = filter_ids
146
147        if len(uniques) >= self.min_num_instances:
148            return True
149        else:
150            return np.random.rand() > self.p_reject

A sampler to reject samples with too few instances in the label data.

Arguments:
  • min_num_instances: The minimum number of instances for accepting a sample.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
  • min_size: The minimal size for instances to be taken into account.
MinInstanceSampler( min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None)
127    def __init__(self, min_num_instances: int = 2, p_reject: float = 1.0, min_size: Optional[int] = None):
128        self.min_num_instances = min_num_instances
129        self.p_reject = p_reject
130        self.min_size = min_size
min_num_instances
p_reject
min_size
class MinTwoInstanceSampler:
153class MinTwoInstanceSampler:
154    """A sampler to reject samples with less than two instances in the label data.
155
156    This is ca. 10x faster than `MinInstanceSampler(min_num_instances=2)` that which uses np.unique, which is slow.
157
158    Args:
159        p_reject: The probability for rejecting a sample that does not meet the criterion.
160    """
161    def __init__(self, p_reject: float = 1.0):
162        self.p_reject = p_reject
163
164    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
165        """Check the sample.
166
167        Args:
168            x: The raw data.
169            y: The label data.
170
171        Returns:
172            Whether to accept this sample.
173        """
174        sample_value = y.flat[0]
175        if (y != sample_value).any():
176            return True
177        else:
178            return np.random.rand() > self.p_reject

A sampler to reject samples with less than two instances in the label data.

This is ca. 10x faster than MinInstanceSampler(min_num_instances=2) that which uses np.unique, which is slow.

Arguments:
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinTwoInstanceSampler(p_reject: float = 1.0)
161    def __init__(self, p_reject: float = 1.0):
162        self.p_reject = p_reject
p_reject
class MinNoToBackgroundBoundarySampler:
187class MinNoToBackgroundBoundarySampler:
188    """A sampler to reject samples for training with pseudo labels.
189
190    Args:
191        trafo: The transformation.
192        min_fraction: The minimal fraction for accepting a sample.
193        p_reject: The probability for rejecting a sample that does not meet the criterion.
194    """
195    def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0):
196        self.trafo = trafo
197        self.bg_label = trafo.bg_label
198        self.mask_label = trafo.mask_label
199        self.min_fraction = min_fraction
200        self.p_reject = p_reject
201
202    def __call__(self, x: np.ndarray, y: np.ndarray) -> bool:
203        """Check the sample.
204
205        Args:
206            x: The raw data.
207            y: The label data.
208
209        Returns:
210            Whether to accept this sample.
211        """
212        y_boundaries = self.trafo(y)
213        y_boundaries[y_boundaries == self.mask_label] = self.bg_label
214        size = float(y_boundaries.size)
215        foreground_fraction = np.sum(y_boundaries != self.bg_label) / size
216        if foreground_fraction > self.min_fraction:
217            return True
218        else:
219            return np.random.rand() > self.p_reject

A sampler to reject samples for training with pseudo labels.

Arguments:
  • trafo: The transformation.
  • min_fraction: The minimal fraction for accepting a sample.
  • p_reject: The probability for rejecting a sample that does not meet the criterion.
MinNoToBackgroundBoundarySampler(trafo, min_fraction: float = 0.01, p_reject: float = 1.0)
195    def __init__(self, trafo, min_fraction: float = 0.01, p_reject: float = 1.0):
196        self.trafo = trafo
197        self.bg_label = trafo.bg_label
198        self.mask_label = trafo.mask_label
199        self.min_fraction = min_fraction
200        self.p_reject = p_reject
trafo
bg_label
mask_label
min_fraction
p_reject