torch_em.data.sampler

  1import numpy as np
  2from typing import List
  3
  4
  5class MinForegroundSampler:
  6    def __init__(
  7        self,
  8        min_fraction: float,
  9        background_id: int = 0,
 10        p_reject: float = 1.0
 11    ):
 12        self.min_fraction = min_fraction
 13        self.background_id = background_id
 14        self.p_reject = p_reject
 15
 16    def __call__(self, x, y=None):
 17
 18        # we do this so it's also possible to use the MinForegroundSampler
 19        # for raw data, in order to filter out not imaged areas, for example in
 20        # large EM volumes.
 21        if y is None:
 22            y = x
 23
 24        size = float(y.size)
 25        if isinstance(self.background_id, int):
 26            foreground_fraction = np.sum(y != self.background_id) / size
 27        else:
 28            foreground_fraction = np.sum(
 29                np.logical_not(np.isin(y, self.background_id))
 30            ) / size
 31        if foreground_fraction > self.min_fraction:
 32            return True
 33        else:
 34            return np.random.rand() > self.p_reject
 35
 36
 37class MinSemanticLabelForegroundSampler:
 38    def __init__(
 39        self,
 40        semantic_ids: List[int],
 41        min_fraction: float,
 42        min_fraction_per_id: bool = False,
 43        p_reject: float = 1.0
 44    ):
 45        self.semantic_ids = semantic_ids
 46        self.min_fraction = min_fraction
 47        self.p_reject = p_reject
 48        self.min_fraction_per_id = min_fraction_per_id
 49
 50    def __call__(self, x, y):
 51        size = float(y.size)
 52
 53        if self.min_fraction_per_id:
 54            foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids]
 55        else:
 56            foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))]
 57
 58        if all(foreground_fraction) > self.min_fraction:
 59            return True
 60        else:
 61            return np.random.rand() > self.p_reject
 62
 63
 64class MinIntensitySampler:
 65    def __init__(
 66        self,
 67        min_intensity: int,
 68        function="median",
 69        p_reject: float = 1.0
 70    ):
 71        self.min_intensity = min_intensity
 72        self.function = getattr(np, function) if isinstance(function, str) else function
 73        assert callable(self.function)
 74        self.p_reject = p_reject
 75
 76    def __call__(self, x, y=None):
 77        intensity = self.function(x)
 78        if intensity > self.min_intensity:
 79            return True
 80        else:
 81            return np.random.rand() > self.p_reject
 82
 83
 84class MinInstanceSampler:
 85    def __init__(
 86        self,
 87        min_num_instances: int = 2,
 88        p_reject: float = 1.0
 89    ):
 90        self.min_num_instances = min_num_instances
 91        self.p_reject = p_reject
 92
 93    def __call__(self, x, y):
 94        uniques = np.unique(y)
 95        if len(uniques) >= self.min_num_instances:
 96            return True
 97        else:
 98            return np.random.rand() > self.p_reject
 99
100
101class MinTwoInstanceSampler:
102    # for the case of min_num_instances=2 this is roughly 10x faster
103    # than using MinInstanceSampler since np.unique is slow
104    def __init__(
105        self,
106        p_reject: float = 1.0
107    ):
108        self.p_reject = p_reject
109
110    def __call__(self, x, y):
111        sample_value = y.flat[0]
112        if (y != sample_value).any():
113            return True
114        else:
115            return np.random.rand() > self.p_reject
116
117
118class MinNoToBackgroundBoundarySampler:
119    # Sometimes it is necessary to ignore boundaries to the background
120    # in RF training. Then, it can happen that even with 2 instances in the
121    # image while sampling there will be no boundary in the image after the
122    # label_transform and the RF only learns one class (Error further downstream).
123    # Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform
124    # is then calculated multiple times.
125    def __init__(
126        self,
127        trafo,
128        min_fraction: float = 0.01,
129        p_reject: float = 1.0
130    ):
131        self.trafo = trafo
132        self.bg_label = trafo.bg_label
133        self.mask_label = trafo.mask_label
134        self.min_fraction = min_fraction
135        self.p_reject = p_reject
136
137    def __call__(self, x, y):
138        y_boundaries = self.trafo(y)
139        y_boundaries[y_boundaries == self.mask_label] = self.bg_label
140        size = float(y_boundaries.size)
141        foreground_fraction = np.sum(y_boundaries != self.bg_label) / size
142        if foreground_fraction > self.min_fraction:
143            return True
144        else:
145            return np.random.rand() > self.p_reject
class MinForegroundSampler:
 6class MinForegroundSampler:
 7    def __init__(
 8        self,
 9        min_fraction: float,
10        background_id: int = 0,
11        p_reject: float = 1.0
12    ):
13        self.min_fraction = min_fraction
14        self.background_id = background_id
15        self.p_reject = p_reject
16
17    def __call__(self, x, y=None):
18
19        # we do this so it's also possible to use the MinForegroundSampler
20        # for raw data, in order to filter out not imaged areas, for example in
21        # large EM volumes.
22        if y is None:
23            y = x
24
25        size = float(y.size)
26        if isinstance(self.background_id, int):
27            foreground_fraction = np.sum(y != self.background_id) / size
28        else:
29            foreground_fraction = np.sum(
30                np.logical_not(np.isin(y, self.background_id))
31            ) / size
32        if foreground_fraction > self.min_fraction:
33            return True
34        else:
35            return np.random.rand() > self.p_reject
MinForegroundSampler(min_fraction: float, background_id: int = 0, p_reject: float = 1.0)
 7    def __init__(
 8        self,
 9        min_fraction: float,
10        background_id: int = 0,
11        p_reject: float = 1.0
12    ):
13        self.min_fraction = min_fraction
14        self.background_id = background_id
15        self.p_reject = p_reject
min_fraction
background_id
p_reject
class MinSemanticLabelForegroundSampler:
38class MinSemanticLabelForegroundSampler:
39    def __init__(
40        self,
41        semantic_ids: List[int],
42        min_fraction: float,
43        min_fraction_per_id: bool = False,
44        p_reject: float = 1.0
45    ):
46        self.semantic_ids = semantic_ids
47        self.min_fraction = min_fraction
48        self.p_reject = p_reject
49        self.min_fraction_per_id = min_fraction_per_id
50
51    def __call__(self, x, y):
52        size = float(y.size)
53
54        if self.min_fraction_per_id:
55            foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids]
56        else:
57            foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))]
58
59        if all(foreground_fraction) > self.min_fraction:
60            return True
61        else:
62            return np.random.rand() > self.p_reject
MinSemanticLabelForegroundSampler( semantic_ids: List[int], min_fraction: float, min_fraction_per_id: bool = False, p_reject: float = 1.0)
39    def __init__(
40        self,
41        semantic_ids: List[int],
42        min_fraction: float,
43        min_fraction_per_id: bool = False,
44        p_reject: float = 1.0
45    ):
46        self.semantic_ids = semantic_ids
47        self.min_fraction = min_fraction
48        self.p_reject = p_reject
49        self.min_fraction_per_id = min_fraction_per_id
semantic_ids
min_fraction
p_reject
min_fraction_per_id
class MinIntensitySampler:
65class MinIntensitySampler:
66    def __init__(
67        self,
68        min_intensity: int,
69        function="median",
70        p_reject: float = 1.0
71    ):
72        self.min_intensity = min_intensity
73        self.function = getattr(np, function) if isinstance(function, str) else function
74        assert callable(self.function)
75        self.p_reject = p_reject
76
77    def __call__(self, x, y=None):
78        intensity = self.function(x)
79        if intensity > self.min_intensity:
80            return True
81        else:
82            return np.random.rand() > self.p_reject
MinIntensitySampler(min_intensity: int, function='median', p_reject: float = 1.0)
66    def __init__(
67        self,
68        min_intensity: int,
69        function="median",
70        p_reject: float = 1.0
71    ):
72        self.min_intensity = min_intensity
73        self.function = getattr(np, function) if isinstance(function, str) else function
74        assert callable(self.function)
75        self.p_reject = p_reject
min_intensity
function
p_reject
class MinInstanceSampler:
85class MinInstanceSampler:
86    def __init__(
87        self,
88        min_num_instances: int = 2,
89        p_reject: float = 1.0
90    ):
91        self.min_num_instances = min_num_instances
92        self.p_reject = p_reject
93
94    def __call__(self, x, y):
95        uniques = np.unique(y)
96        if len(uniques) >= self.min_num_instances:
97            return True
98        else:
99            return np.random.rand() > self.p_reject
MinInstanceSampler(min_num_instances: int = 2, p_reject: float = 1.0)
86    def __init__(
87        self,
88        min_num_instances: int = 2,
89        p_reject: float = 1.0
90    ):
91        self.min_num_instances = min_num_instances
92        self.p_reject = p_reject
min_num_instances
p_reject
class MinTwoInstanceSampler:
102class MinTwoInstanceSampler:
103    # for the case of min_num_instances=2 this is roughly 10x faster
104    # than using MinInstanceSampler since np.unique is slow
105    def __init__(
106        self,
107        p_reject: float = 1.0
108    ):
109        self.p_reject = p_reject
110
111    def __call__(self, x, y):
112        sample_value = y.flat[0]
113        if (y != sample_value).any():
114            return True
115        else:
116            return np.random.rand() > self.p_reject
MinTwoInstanceSampler(p_reject: float = 1.0)
105    def __init__(
106        self,
107        p_reject: float = 1.0
108    ):
109        self.p_reject = p_reject
p_reject
class MinNoToBackgroundBoundarySampler:
119class MinNoToBackgroundBoundarySampler:
120    # Sometimes it is necessary to ignore boundaries to the background
121    # in RF training. Then, it can happen that even with 2 instances in the
122    # image while sampling there will be no boundary in the image after the
123    # label_transform and the RF only learns one class (Error further downstream).
124    # Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform
125    # is then calculated multiple times.
126    def __init__(
127        self,
128        trafo,
129        min_fraction: float = 0.01,
130        p_reject: float = 1.0
131    ):
132        self.trafo = trafo
133        self.bg_label = trafo.bg_label
134        self.mask_label = trafo.mask_label
135        self.min_fraction = min_fraction
136        self.p_reject = p_reject
137
138    def __call__(self, x, y):
139        y_boundaries = self.trafo(y)
140        y_boundaries[y_boundaries == self.mask_label] = self.bg_label
141        size = float(y_boundaries.size)
142        foreground_fraction = np.sum(y_boundaries != self.bg_label) / size
143        if foreground_fraction > self.min_fraction:
144            return True
145        else:
146            return np.random.rand() > self.p_reject
MinNoToBackgroundBoundarySampler(trafo, min_fraction: float = 0.01, p_reject: float = 1.0)
126    def __init__(
127        self,
128        trafo,
129        min_fraction: float = 0.01,
130        p_reject: float = 1.0
131    ):
132        self.trafo = trafo
133        self.bg_label = trafo.bg_label
134        self.mask_label = trafo.mask_label
135        self.min_fraction = min_fraction
136        self.p_reject = p_reject
trafo
bg_label
mask_label
min_fraction
p_reject