torch_em.data.datasets.electron_microscopy.cremi
CREMI is a dataset for neuron segmentation in EM.
It contains three annotated volumes from the adult fruit-fly brain. It was held as a challenge at MICCAI 2016. For details on the dataset check out https://cremi.org/. Please cite the challenge if you use the dataset in your research.
1"""CREMI is a dataset for neuron segmentation in EM. 2 3It contains three annotated volumes from the adult fruit-fly brain. 4It was held as a challenge at MICCAI 2016. For details on the dataset check out https://cremi.org/. 5Please cite the challenge if you use the dataset in your research. 6""" 7# TODO add support for realigned volumes 8 9import os 10from typing import Any, Dict, List, Optional, Tuple, Union 11 12import numpy as np 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21CREMI_URLS = { 22 "original": { 23 "A": "https://cremi.org/static/data/sample_A_20160501.hdf", 24 "B": "https://cremi.org/static/data/sample_B_20160501.hdf", 25 "C": "https://cremi.org/static/data/sample_C_20160501.hdf", 26 }, 27 "realigned": {}, 28 "defects": "https://zenodo.org/record/5767036/files/sample_ABC_padded_defects.h5" 29} 30CHECKSUMS = { 31 "original": { 32 "A": "4c563d1b78acb2bcfb3ea958b6fe1533422f7f4a19f3e05b600bfa11430b510d", 33 "B": "887e85521e00deead18c94a21ad71f278d88a5214c7edeed943130a1f4bb48b8", 34 "C": "2874496f224d222ebc29d0e4753e8c458093e1d37bc53acd1b69b19ed1ae7052", 35 }, 36 "realigned": {}, 37 "defects": "7b06ffa34733b2c32956ea5005e0cf345e7d3a27477f42f7c905701cdc947bd0" 38} 39 40 41def get_cremi_data(path: Union[os.PathLike, str], samples: Tuple[str], download: bool, use_realigned: bool = False): 42 """Download the CREMI training data. 43 44 Args: 45 path: Filepath to a folder where the downloaded data will be saved. 46 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 47 download: Whether to download the data if it is not present. 48 use_realigned: Use the realigned instead of the original training data. 49 """ 50 if use_realigned: 51 # we need to sample batches in this case 52 # sampler = torch_em.data.MinForegroundSampler(min_fraction=0.05, p_reject=.75) 53 raise NotImplementedError 54 else: 55 urls = CREMI_URLS["original"] 56 checksums = CHECKSUMS["original"] 57 58 os.makedirs(path, exist_ok=True) 59 for name in samples: 60 url = urls[name] 61 checksum = checksums[name] 62 data_path = os.path.join(path, f"sample{name}.h5") 63 # CREMI SSL certificates expired, so we need to disable verification 64 util.download_source(data_path, url, download, checksum, verify=False) 65 66 67def get_cremi_paths( 68 path: Union[os.PathLike, str], 69 samples: Tuple[str, ...] = ("A", "B", "C"), 70 use_realigned: bool = False, 71 download: bool = False 72) -> List[str]: 73 """Get paths to the CREMI data. 74 75 Args: 76 path: Filepath to a folder where the downloaded data will be saved. 77 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 78 use_realigned: Use the realigned instead of the original training data. 79 download: Whether to download the data if it is not present. 80 81 Returns: 82 The filepaths to the training data. 83 """ 84 get_cremi_data(path, samples, download, use_realigned) 85 data_paths = [os.path.join(path, f"sample{name}.h5") for name in samples] 86 return data_paths 87 88 89def get_cremi_dataset( 90 path: Union[os.PathLike, str], 91 patch_shape: Tuple[int, int, int], 92 samples: Tuple[str, ...] = ("A", "B", "C"), 93 use_realigned: bool = False, 94 download: bool = False, 95 offsets: Optional[List[List[int]]] = None, 96 boundaries: bool = False, 97 rois: Dict[str, Any] = {}, 98 defect_augmentation_kwargs: Dict[str, Any] = { 99 "p_drop_slice": 0.025, 100 "p_low_contrast": 0.025, 101 "p_deform_slice": 0.0, 102 "deformation_mode": "compress", 103 }, 104 **kwargs, 105) -> Dataset: 106 """Get the CREMI dataset for the segmentation of neurons in EM. 107 108 Args: 109 path: Filepath to a folder where the downloaded data will be saved. 110 patch_shape: The patch shape to use for training. 111 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 112 use_realigned: Use the realigned instead of the original training data. 113 download: Whether to download the data if it is not present. 114 offsets: Offset values for affinity computation used as target. 115 boundaries: Whether to compute boundaries as the target. 116 rois: The region of interests to use for the samples. 117 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 118 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 119 120 Returns: 121 The segmentation dataset. 122 """ 123 assert len(patch_shape) == 3 124 if rois is not None: 125 assert isinstance(rois, dict) 126 127 data_paths = get_cremi_paths(path, samples, use_realigned, download) 128 data_rois = [rois.get(name, np.s_[:, :, :]) for name in samples] 129 130 if defect_augmentation_kwargs is not None and "artifact_source" not in defect_augmentation_kwargs: 131 # download the defect volume 132 url = CREMI_URLS["defects"] 133 checksum = CHECKSUMS["defects"] 134 defect_path = os.path.join(path, "cremi_defects.h5") 135 util.download_source(defect_path, url, download, checksum) 136 defect_patch_shape = (1,) + tuple(patch_shape[1:]) 137 artifact_source = torch_em.transform.get_artifact_source( 138 defect_path, defect_patch_shape, 139 min_mask_fraction=0.75, 140 raw_key="defect_sections/raw", 141 mask_key="defect_sections/mask" 142 ) 143 defect_augmentation_kwargs.update({"artifact_source": artifact_source}) 144 145 # defect augmentations 146 if defect_augmentation_kwargs is not None: 147 raw_transform = torch_em.transform.get_raw_transform( 148 augmentation1=torch_em.transform.EMDefectAugmentation(**defect_augmentation_kwargs) 149 ) 150 kwargs = util.update_kwargs(kwargs, "raw_transform", raw_transform) 151 152 kwargs, _ = util.add_instance_label_transform( 153 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 154 ) 155 156 return torch_em.default_segmentation_dataset( 157 raw_paths=data_paths, 158 raw_key="volumes/raw", 159 label_paths=data_paths, 160 label_key="volumes/labels/neuron_ids", 161 patch_shape=patch_shape, 162 rois=data_rois, 163 **kwargs 164 ) 165 166 167def get_cremi_loader( 168 path: Union[os.PathLike, str], 169 patch_shape: Tuple[int, int, int], 170 batch_size: int, 171 samples: Tuple[str, ...] = ("A", "B", "C"), 172 use_realigned: bool = False, 173 download: bool = False, 174 offsets: Optional[List[List[int]]] = None, 175 boundaries: bool = False, 176 rois: Dict[str, Any] = {}, 177 defect_augmentation_kwargs: Dict[str, Any] = { 178 "p_drop_slice": 0.025, 179 "p_low_contrast": 0.025, 180 "p_deform_slice": 0.0, 181 "deformation_mode": "compress", 182 }, 183 **kwargs, 184) -> DataLoader: 185 """Get the DataLoader for EM neuron segmentation in the CREMI dataset. 186 187 Args: 188 path: Filepath to a folder where the downloaded data will be saved. 189 patch_shape: The patch shape to use for training. 190 batch_size: The batch size for training. 191 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 192 use_realigned: Use the realigned instead of the original training data. 193 download: Whether to download the data if it is not present. 194 offsets: Offset values for affinity computation used as target. 195 boundaries: Whether to compute boundaries as the target. 196 rois: The region of interests to use for the samples. 197 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 198 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 199 200 Returns: 201 The DataLoader. 202 """ 203 dataset_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 204 ds = get_cremi_dataset( 205 path=path, 206 patch_shape=patch_shape, 207 samples=samples, 208 use_realigned=use_realigned, 209 download=download, 210 offsets=offsets, 211 boundaries=boundaries, 212 rois=rois, 213 defect_augmentation_kwargs=defect_augmentation_kwargs, 214 **dataset_kwargs, 215 ) 216 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
CREMI_URLS =
{'original': {'A': 'https://cremi.org/static/data/sample_A_20160501.hdf', 'B': 'https://cremi.org/static/data/sample_B_20160501.hdf', 'C': 'https://cremi.org/static/data/sample_C_20160501.hdf'}, 'realigned': {}, 'defects': 'https://zenodo.org/record/5767036/files/sample_ABC_padded_defects.h5'}
CHECKSUMS =
{'original': {'A': '4c563d1b78acb2bcfb3ea958b6fe1533422f7f4a19f3e05b600bfa11430b510d', 'B': '887e85521e00deead18c94a21ad71f278d88a5214c7edeed943130a1f4bb48b8', 'C': '2874496f224d222ebc29d0e4753e8c458093e1d37bc53acd1b69b19ed1ae7052'}, 'realigned': {}, 'defects': '7b06ffa34733b2c32956ea5005e0cf345e7d3a27477f42f7c905701cdc947bd0'}
def
get_cremi_data( path: Union[os.PathLike, str], samples: Tuple[str], download: bool, use_realigned: bool = False):
42def get_cremi_data(path: Union[os.PathLike, str], samples: Tuple[str], download: bool, use_realigned: bool = False): 43 """Download the CREMI training data. 44 45 Args: 46 path: Filepath to a folder where the downloaded data will be saved. 47 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 48 download: Whether to download the data if it is not present. 49 use_realigned: Use the realigned instead of the original training data. 50 """ 51 if use_realigned: 52 # we need to sample batches in this case 53 # sampler = torch_em.data.MinForegroundSampler(min_fraction=0.05, p_reject=.75) 54 raise NotImplementedError 55 else: 56 urls = CREMI_URLS["original"] 57 checksums = CHECKSUMS["original"] 58 59 os.makedirs(path, exist_ok=True) 60 for name in samples: 61 url = urls[name] 62 checksum = checksums[name] 63 data_path = os.path.join(path, f"sample{name}.h5") 64 # CREMI SSL certificates expired, so we need to disable verification 65 util.download_source(data_path, url, download, checksum, verify=False)
Download the CREMI training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- download: Whether to download the data if it is not present.
- use_realigned: Use the realigned instead of the original training data.
def
get_cremi_paths( path: Union[os.PathLike, str], samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False) -> List[str]:
68def get_cremi_paths( 69 path: Union[os.PathLike, str], 70 samples: Tuple[str, ...] = ("A", "B", "C"), 71 use_realigned: bool = False, 72 download: bool = False 73) -> List[str]: 74 """Get paths to the CREMI data. 75 76 Args: 77 path: Filepath to a folder where the downloaded data will be saved. 78 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 79 use_realigned: Use the realigned instead of the original training data. 80 download: Whether to download the data if it is not present. 81 82 Returns: 83 The filepaths to the training data. 84 """ 85 get_cremi_data(path, samples, download, use_realigned) 86 data_paths = [os.path.join(path, f"sample{name}.h5") for name in samples] 87 return data_paths
Get paths to the CREMI data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- use_realigned: Use the realigned instead of the original training data.
- download: Whether to download the data if it is not present.
Returns:
The filepaths to the training data.
def
get_cremi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[str, Any] = {}, defect_augmentation_kwargs: Dict[str, Any] = {'p_drop_slice': 0.025, 'p_low_contrast': 0.025, 'p_deform_slice': 0.0, 'deformation_mode': 'compress'}, **kwargs) -> torch.utils.data.dataset.Dataset:
90def get_cremi_dataset( 91 path: Union[os.PathLike, str], 92 patch_shape: Tuple[int, int, int], 93 samples: Tuple[str, ...] = ("A", "B", "C"), 94 use_realigned: bool = False, 95 download: bool = False, 96 offsets: Optional[List[List[int]]] = None, 97 boundaries: bool = False, 98 rois: Dict[str, Any] = {}, 99 defect_augmentation_kwargs: Dict[str, Any] = { 100 "p_drop_slice": 0.025, 101 "p_low_contrast": 0.025, 102 "p_deform_slice": 0.0, 103 "deformation_mode": "compress", 104 }, 105 **kwargs, 106) -> Dataset: 107 """Get the CREMI dataset for the segmentation of neurons in EM. 108 109 Args: 110 path: Filepath to a folder where the downloaded data will be saved. 111 patch_shape: The patch shape to use for training. 112 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 113 use_realigned: Use the realigned instead of the original training data. 114 download: Whether to download the data if it is not present. 115 offsets: Offset values for affinity computation used as target. 116 boundaries: Whether to compute boundaries as the target. 117 rois: The region of interests to use for the samples. 118 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 119 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 120 121 Returns: 122 The segmentation dataset. 123 """ 124 assert len(patch_shape) == 3 125 if rois is not None: 126 assert isinstance(rois, dict) 127 128 data_paths = get_cremi_paths(path, samples, use_realigned, download) 129 data_rois = [rois.get(name, np.s_[:, :, :]) for name in samples] 130 131 if defect_augmentation_kwargs is not None and "artifact_source" not in defect_augmentation_kwargs: 132 # download the defect volume 133 url = CREMI_URLS["defects"] 134 checksum = CHECKSUMS["defects"] 135 defect_path = os.path.join(path, "cremi_defects.h5") 136 util.download_source(defect_path, url, download, checksum) 137 defect_patch_shape = (1,) + tuple(patch_shape[1:]) 138 artifact_source = torch_em.transform.get_artifact_source( 139 defect_path, defect_patch_shape, 140 min_mask_fraction=0.75, 141 raw_key="defect_sections/raw", 142 mask_key="defect_sections/mask" 143 ) 144 defect_augmentation_kwargs.update({"artifact_source": artifact_source}) 145 146 # defect augmentations 147 if defect_augmentation_kwargs is not None: 148 raw_transform = torch_em.transform.get_raw_transform( 149 augmentation1=torch_em.transform.EMDefectAugmentation(**defect_augmentation_kwargs) 150 ) 151 kwargs = util.update_kwargs(kwargs, "raw_transform", raw_transform) 152 153 kwargs, _ = util.add_instance_label_transform( 154 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 155 ) 156 157 return torch_em.default_segmentation_dataset( 158 raw_paths=data_paths, 159 raw_key="volumes/raw", 160 label_paths=data_paths, 161 label_key="volumes/labels/neuron_ids", 162 patch_shape=patch_shape, 163 rois=data_rois, 164 **kwargs 165 )
Get the CREMI dataset for the segmentation of neurons in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- use_realigned: Use the realigned instead of the original training data.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- rois: The region of interests to use for the samples.
- defect_augmentation_kwargs: Keyword arguments for defect augmentations.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_cremi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[str, Any] = {}, defect_augmentation_kwargs: Dict[str, Any] = {'p_drop_slice': 0.025, 'p_low_contrast': 0.025, 'p_deform_slice': 0.0, 'deformation_mode': 'compress'}, **kwargs) -> torch.utils.data.dataloader.DataLoader:
168def get_cremi_loader( 169 path: Union[os.PathLike, str], 170 patch_shape: Tuple[int, int, int], 171 batch_size: int, 172 samples: Tuple[str, ...] = ("A", "B", "C"), 173 use_realigned: bool = False, 174 download: bool = False, 175 offsets: Optional[List[List[int]]] = None, 176 boundaries: bool = False, 177 rois: Dict[str, Any] = {}, 178 defect_augmentation_kwargs: Dict[str, Any] = { 179 "p_drop_slice": 0.025, 180 "p_low_contrast": 0.025, 181 "p_deform_slice": 0.0, 182 "deformation_mode": "compress", 183 }, 184 **kwargs, 185) -> DataLoader: 186 """Get the DataLoader for EM neuron segmentation in the CREMI dataset. 187 188 Args: 189 path: Filepath to a folder where the downloaded data will be saved. 190 patch_shape: The patch shape to use for training. 191 batch_size: The batch size for training. 192 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 193 use_realigned: Use the realigned instead of the original training data. 194 download: Whether to download the data if it is not present. 195 offsets: Offset values for affinity computation used as target. 196 boundaries: Whether to compute boundaries as the target. 197 rois: The region of interests to use for the samples. 198 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 199 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 200 201 Returns: 202 The DataLoader. 203 """ 204 dataset_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 205 ds = get_cremi_dataset( 206 path=path, 207 patch_shape=patch_shape, 208 samples=samples, 209 use_realigned=use_realigned, 210 download=download, 211 offsets=offsets, 212 boundaries=boundaries, 213 rois=rois, 214 defect_augmentation_kwargs=defect_augmentation_kwargs, 215 **dataset_kwargs, 216 ) 217 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the DataLoader for EM neuron segmentation in the CREMI dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- use_realigned: Use the realigned instead of the original training data.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- rois: The region of interests to use for the samples.
- defect_augmentation_kwargs: Keyword arguments for defect augmentations.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.