torch_em.data.sampler
1import numpy as np 2from typing import List 3 4 5class MinForegroundSampler: 6 def __init__( 7 self, 8 min_fraction: float, 9 background_id: int = 0, 10 p_reject: float = 1.0 11 ): 12 self.min_fraction = min_fraction 13 self.background_id = background_id 14 self.p_reject = p_reject 15 16 def __call__(self, x, y=None): 17 18 # we do this so it's also possible to use the MinForegroundSampler 19 # for raw data, in order to filter out not imaged areas, for example in 20 # large EM volumes. 21 if y is None: 22 y = x 23 24 size = float(y.size) 25 if isinstance(self.background_id, int): 26 foreground_fraction = np.sum(y != self.background_id) / size 27 else: 28 foreground_fraction = np.sum( 29 np.logical_not(np.isin(y, self.background_id)) 30 ) / size 31 if foreground_fraction > self.min_fraction: 32 return True 33 else: 34 return np.random.rand() > self.p_reject 35 36 37class MinSemanticLabelForegroundSampler: 38 def __init__( 39 self, 40 semantic_ids: List[int], 41 min_fraction: float, 42 min_fraction_per_id: bool = False, 43 p_reject: float = 1.0 44 ): 45 self.semantic_ids = semantic_ids 46 self.min_fraction = min_fraction 47 self.p_reject = p_reject 48 self.min_fraction_per_id = min_fraction_per_id 49 50 def __call__(self, x, y): 51 size = float(y.size) 52 53 if self.min_fraction_per_id: 54 foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids] 55 else: 56 foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))] 57 58 if all(foreground_fraction) > self.min_fraction: 59 return True 60 else: 61 return np.random.rand() > self.p_reject 62 63 64class MinIntensitySampler: 65 def __init__( 66 self, 67 min_intensity: int, 68 function="median", 69 p_reject: float = 1.0 70 ): 71 self.min_intensity = min_intensity 72 self.function = getattr(np, function) if isinstance(function, str) else function 73 assert callable(self.function) 74 self.p_reject = p_reject 75 76 def __call__(self, x, y=None): 77 intensity = self.function(x) 78 if intensity > self.min_intensity: 79 return True 80 else: 81 return np.random.rand() > self.p_reject 82 83 84class MinInstanceSampler: 85 def __init__( 86 self, 87 min_num_instances: int = 2, 88 p_reject: float = 1.0 89 ): 90 self.min_num_instances = min_num_instances 91 self.p_reject = p_reject 92 93 def __call__(self, x, y): 94 uniques = np.unique(y) 95 if len(uniques) >= self.min_num_instances: 96 return True 97 else: 98 return np.random.rand() > self.p_reject 99 100 101class MinTwoInstanceSampler: 102 # for the case of min_num_instances=2 this is roughly 10x faster 103 # than using MinInstanceSampler since np.unique is slow 104 def __init__( 105 self, 106 p_reject: float = 1.0 107 ): 108 self.p_reject = p_reject 109 110 def __call__(self, x, y): 111 sample_value = y.flat[0] 112 if (y != sample_value).any(): 113 return True 114 else: 115 return np.random.rand() > self.p_reject 116 117 118class MinNoToBackgroundBoundarySampler: 119 # Sometimes it is necessary to ignore boundaries to the background 120 # in RF training. Then, it can happen that even with 2 instances in the 121 # image while sampling there will be no boundary in the image after the 122 # label_transform and the RF only learns one class (Error further downstream). 123 # Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform 124 # is then calculated multiple times. 125 def __init__( 126 self, 127 trafo, 128 min_fraction: float = 0.01, 129 p_reject: float = 1.0 130 ): 131 self.trafo = trafo 132 self.bg_label = trafo.bg_label 133 self.mask_label = trafo.mask_label 134 self.min_fraction = min_fraction 135 self.p_reject = p_reject 136 137 def __call__(self, x, y): 138 y_boundaries = self.trafo(y) 139 y_boundaries[y_boundaries == self.mask_label] = self.bg_label 140 size = float(y_boundaries.size) 141 foreground_fraction = np.sum(y_boundaries != self.bg_label) / size 142 if foreground_fraction > self.min_fraction: 143 return True 144 else: 145 return np.random.rand() > self.p_reject
class
MinForegroundSampler:
6class MinForegroundSampler: 7 def __init__( 8 self, 9 min_fraction: float, 10 background_id: int = 0, 11 p_reject: float = 1.0 12 ): 13 self.min_fraction = min_fraction 14 self.background_id = background_id 15 self.p_reject = p_reject 16 17 def __call__(self, x, y=None): 18 19 # we do this so it's also possible to use the MinForegroundSampler 20 # for raw data, in order to filter out not imaged areas, for example in 21 # large EM volumes. 22 if y is None: 23 y = x 24 25 size = float(y.size) 26 if isinstance(self.background_id, int): 27 foreground_fraction = np.sum(y != self.background_id) / size 28 else: 29 foreground_fraction = np.sum( 30 np.logical_not(np.isin(y, self.background_id)) 31 ) / size 32 if foreground_fraction > self.min_fraction: 33 return True 34 else: 35 return np.random.rand() > self.p_reject
class
MinSemanticLabelForegroundSampler:
38class MinSemanticLabelForegroundSampler: 39 def __init__( 40 self, 41 semantic_ids: List[int], 42 min_fraction: float, 43 min_fraction_per_id: bool = False, 44 p_reject: float = 1.0 45 ): 46 self.semantic_ids = semantic_ids 47 self.min_fraction = min_fraction 48 self.p_reject = p_reject 49 self.min_fraction_per_id = min_fraction_per_id 50 51 def __call__(self, x, y): 52 size = float(y.size) 53 54 if self.min_fraction_per_id: 55 foreground_fraction = [np.sum(np.isin(y, idx)) / size for idx in self.semantic_ids] 56 else: 57 foreground_fraction = [np.sum(np.isin(y, self.semantic_ids))] 58 59 if all(foreground_fraction) > self.min_fraction: 60 return True 61 else: 62 return np.random.rand() > self.p_reject
class
MinIntensitySampler:
65class MinIntensitySampler: 66 def __init__( 67 self, 68 min_intensity: int, 69 function="median", 70 p_reject: float = 1.0 71 ): 72 self.min_intensity = min_intensity 73 self.function = getattr(np, function) if isinstance(function, str) else function 74 assert callable(self.function) 75 self.p_reject = p_reject 76 77 def __call__(self, x, y=None): 78 intensity = self.function(x) 79 if intensity > self.min_intensity: 80 return True 81 else: 82 return np.random.rand() > self.p_reject
class
MinInstanceSampler:
85class MinInstanceSampler: 86 def __init__( 87 self, 88 min_num_instances: int = 2, 89 p_reject: float = 1.0 90 ): 91 self.min_num_instances = min_num_instances 92 self.p_reject = p_reject 93 94 def __call__(self, x, y): 95 uniques = np.unique(y) 96 if len(uniques) >= self.min_num_instances: 97 return True 98 else: 99 return np.random.rand() > self.p_reject
class
MinTwoInstanceSampler:
102class MinTwoInstanceSampler: 103 # for the case of min_num_instances=2 this is roughly 10x faster 104 # than using MinInstanceSampler since np.unique is slow 105 def __init__( 106 self, 107 p_reject: float = 1.0 108 ): 109 self.p_reject = p_reject 110 111 def __call__(self, x, y): 112 sample_value = y.flat[0] 113 if (y != sample_value).any(): 114 return True 115 else: 116 return np.random.rand() > self.p_reject
class
MinNoToBackgroundBoundarySampler:
119class MinNoToBackgroundBoundarySampler: 120 # Sometimes it is necessary to ignore boundaries to the background 121 # in RF training. Then, it can happen that even with 2 instances in the 122 # image while sampling there will be no boundary in the image after the 123 # label_transform and the RF only learns one class (Error further downstream). 124 # Therefore, this sampler is needed. Unfortunatley, the NoToBackgroundBoundaryTransform 125 # is then calculated multiple times. 126 def __init__( 127 self, 128 trafo, 129 min_fraction: float = 0.01, 130 p_reject: float = 1.0 131 ): 132 self.trafo = trafo 133 self.bg_label = trafo.bg_label 134 self.mask_label = trafo.mask_label 135 self.min_fraction = min_fraction 136 self.p_reject = p_reject 137 138 def __call__(self, x, y): 139 y_boundaries = self.trafo(y) 140 y_boundaries[y_boundaries == self.mask_label] = self.bg_label 141 size = float(y_boundaries.size) 142 foreground_fraction = np.sum(y_boundaries != self.bg_label) / size 143 if foreground_fraction > self.min_fraction: 144 return True 145 else: 146 return np.random.rand() > self.p_reject