torch_em.data.datasets.electron_microscopy.cremi
CREMI is a dataset for neuron segmentation in EM.
It contains three annotated volumes from the adult fruit-fly brain. It was held as a challenge at MICCAI 2016. For details on the dataset check out https://cremi.org/.
1"""CREMI is a dataset for neuron segmentation in EM. 2 3It contains three annotated volumes from the adult fruit-fly brain. 4It was held as a challenge at MICCAI 2016. For details on the dataset check out https://cremi.org/. 5""" 6# TODO add support for realigned volumes 7 8import os 9from typing import Any, Dict, List, Optional, Tuple, Union 10 11import numpy as np 12import torch_em 13from torch.utils.data import Dataset, DataLoader 14 15from .. import util 16 17CREMI_URLS = { 18 "original": { 19 "A": "https://cremi.org/static/data/sample_A_20160501.hdf", 20 "B": "https://cremi.org/static/data/sample_B_20160501.hdf", 21 "C": "https://cremi.org/static/data/sample_C_20160501.hdf", 22 }, 23 "realigned": {}, 24 "defects": "https://zenodo.org/record/5767036/files/sample_ABC_padded_defects.h5" 25} 26CHECKSUMS = { 27 "original": { 28 "A": "4c563d1b78acb2bcfb3ea958b6fe1533422f7f4a19f3e05b600bfa11430b510d", 29 "B": "887e85521e00deead18c94a21ad71f278d88a5214c7edeed943130a1f4bb48b8", 30 "C": "2874496f224d222ebc29d0e4753e8c458093e1d37bc53acd1b69b19ed1ae7052", 31 }, 32 "realigned": {}, 33 "defects": "7b06ffa34733b2c32956ea5005e0cf345e7d3a27477f42f7c905701cdc947bd0" 34} 35 36 37def get_cremi_data( 38 path: Union[os.PathLike, str], 39 samples: Tuple[str], 40 download: bool, 41 use_realigned: bool = False, 42) -> List[str]: 43 """Download the CREMI training data. 44 45 Args: 46 path: Filepath to a folder where the downloaded data will be saved. 47 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 48 download: Whether to download the data if it is not present. 49 use_realigned: Use the realigned instead of the original training data. 50 51 Returns: 52 The filepaths to the training data. 53 """ 54 if use_realigned: 55 # we need to sample batches in this case 56 # sampler = torch_em.data.MinForegroundSampler(min_fraction=0.05, p_reject=.75) 57 raise NotImplementedError 58 else: 59 urls = CREMI_URLS["original"] 60 checksums = CHECKSUMS["original"] 61 62 os.makedirs(path, exist_ok=True) 63 data_paths = [] 64 for name in samples: 65 url = urls[name] 66 checksum = checksums[name] 67 data_path = os.path.join(path, f"sample{name}.h5") 68 # CREMI SSL certificates expired, so we need to disable verification 69 util.download_source(data_path, url, download, checksum, verify=False) 70 data_paths.append(data_path) 71 return data_paths 72 73 74def get_cremi_dataset( 75 path: Union[os.PathLike, str], 76 patch_shape: Tuple[int, int, int], 77 samples: Tuple[str, ...] = ("A", "B", "C"), 78 use_realigned: bool = False, 79 download: bool = False, 80 offsets: Optional[List[List[int]]] = None, 81 boundaries: bool = False, 82 rois: Dict[str, Any] = {}, 83 defect_augmentation_kwargs: Dict[str, Any] = { 84 "p_drop_slice": 0.025, 85 "p_low_contrast": 0.025, 86 "p_deform_slice": 0.0, 87 "deformation_mode": "compress", 88 }, 89 **kwargs, 90) -> Dataset: 91 """Get the CREMI dataset for the segmentation of neurons in EM. 92 93 Args: 94 path: Filepath to a folder where the downloaded data will be saved. 95 patch_shape: The patch shape to use for training. 96 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 97 use_realigned: Use the realigned instead of the original training data. 98 download: Whether to download the data if it is not present. 99 offsets: Offset values for affinity computation used as target. 100 boundaries: Whether to compute boundaries as the target. 101 rois: The region of interests to use for the samples. 102 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 103 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 104 105 Returns: 106 The segmentation dataset. 107 """ 108 assert len(patch_shape) == 3 109 if rois is not None: 110 assert isinstance(rois, dict) 111 112 data_paths = get_cremi_data(path, samples, download, use_realigned) 113 data_rois = [rois.get(name, np.s_[:, :, :]) for name in samples] 114 115 if defect_augmentation_kwargs is not None and "artifact_source" not in defect_augmentation_kwargs: 116 # download the defect volume 117 url = CREMI_URLS["defects"] 118 checksum = CHECKSUMS["defects"] 119 defect_path = os.path.join(path, "cremi_defects.h5") 120 util.download_source(defect_path, url, download, checksum) 121 defect_patch_shape = (1,) + tuple(patch_shape[1:]) 122 artifact_source = torch_em.transform.get_artifact_source(defect_path, defect_patch_shape, 123 min_mask_fraction=0.75, 124 raw_key="defect_sections/raw", 125 mask_key="defect_sections/mask") 126 defect_augmentation_kwargs.update({"artifact_source": artifact_source}) 127 128 raw_key = "volumes/raw" 129 label_key = "volumes/labels/neuron_ids" 130 131 # defect augmentations 132 if defect_augmentation_kwargs is not None: 133 raw_transform = torch_em.transform.get_raw_transform( 134 augmentation1=torch_em.transform.EMDefectAugmentation(**defect_augmentation_kwargs) 135 ) 136 kwargs = util.update_kwargs(kwargs, "raw_transform", raw_transform) 137 138 kwargs, _ = util.add_instance_label_transform( 139 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 140 ) 141 142 return torch_em.default_segmentation_dataset( 143 data_paths, raw_key, data_paths, label_key, patch_shape, rois=data_rois, **kwargs 144 ) 145 146 147def get_cremi_loader( 148 path: Union[os.PathLike, str], 149 patch_shape: Tuple[int, int, int], 150 batch_size: int, 151 samples: Tuple[str, ...] = ("A", "B", "C"), 152 use_realigned: bool = False, 153 download: bool = False, 154 offsets: Optional[List[List[int]]] = None, 155 boundaries: bool = False, 156 rois: Dict[str, Any] = {}, 157 defect_augmentation_kwargs: Dict[str, Any] = { 158 "p_drop_slice": 0.025, 159 "p_low_contrast": 0.025, 160 "p_deform_slice": 0.0, 161 "deformation_mode": "compress", 162 }, 163 **kwargs, 164) -> DataLoader: 165 """Get the DataLoader for EM neuron segmentation in the CREMI dataset. 166 167 Args: 168 path: Filepath to a folder where the downloaded data will be saved. 169 patch_shape: The patch shape to use for training. 170 batch_size: The batch size for training. 171 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 172 use_realigned: Use the realigned instead of the original training data. 173 download: Whether to download the data if it is not present. 174 offsets: Offset values for affinity computation used as target. 175 boundaries: Whether to compute boundaries as the target. 176 rois: The region of interests to use for the samples. 177 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 178 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 179 180 Returns: 181 The DataLoader. 182 """ 183 dataset_kwargs, loader_kwargs = util.split_kwargs( 184 torch_em.default_segmentation_dataset, **kwargs 185 ) 186 ds = get_cremi_dataset( 187 path=path, 188 patch_shape=patch_shape, 189 samples=samples, 190 use_realigned=use_realigned, 191 download=download, 192 offsets=offsets, 193 boundaries=boundaries, 194 rois=rois, 195 defect_augmentation_kwargs=defect_augmentation_kwargs, 196 **dataset_kwargs, 197 ) 198 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
CREMI_URLS =
{'original': {'A': 'https://cremi.org/static/data/sample_A_20160501.hdf', 'B': 'https://cremi.org/static/data/sample_B_20160501.hdf', 'C': 'https://cremi.org/static/data/sample_C_20160501.hdf'}, 'realigned': {}, 'defects': 'https://zenodo.org/record/5767036/files/sample_ABC_padded_defects.h5'}
CHECKSUMS =
{'original': {'A': '4c563d1b78acb2bcfb3ea958b6fe1533422f7f4a19f3e05b600bfa11430b510d', 'B': '887e85521e00deead18c94a21ad71f278d88a5214c7edeed943130a1f4bb48b8', 'C': '2874496f224d222ebc29d0e4753e8c458093e1d37bc53acd1b69b19ed1ae7052'}, 'realigned': {}, 'defects': '7b06ffa34733b2c32956ea5005e0cf345e7d3a27477f42f7c905701cdc947bd0'}
def
get_cremi_data( path: Union[os.PathLike, str], samples: Tuple[str], download: bool, use_realigned: bool = False) -> List[str]:
38def get_cremi_data( 39 path: Union[os.PathLike, str], 40 samples: Tuple[str], 41 download: bool, 42 use_realigned: bool = False, 43) -> List[str]: 44 """Download the CREMI training data. 45 46 Args: 47 path: Filepath to a folder where the downloaded data will be saved. 48 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 49 download: Whether to download the data if it is not present. 50 use_realigned: Use the realigned instead of the original training data. 51 52 Returns: 53 The filepaths to the training data. 54 """ 55 if use_realigned: 56 # we need to sample batches in this case 57 # sampler = torch_em.data.MinForegroundSampler(min_fraction=0.05, p_reject=.75) 58 raise NotImplementedError 59 else: 60 urls = CREMI_URLS["original"] 61 checksums = CHECKSUMS["original"] 62 63 os.makedirs(path, exist_ok=True) 64 data_paths = [] 65 for name in samples: 66 url = urls[name] 67 checksum = checksums[name] 68 data_path = os.path.join(path, f"sample{name}.h5") 69 # CREMI SSL certificates expired, so we need to disable verification 70 util.download_source(data_path, url, download, checksum, verify=False) 71 data_paths.append(data_path) 72 return data_paths
Download the CREMI training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- download: Whether to download the data if it is not present.
- use_realigned: Use the realigned instead of the original training data.
Returns:
The filepaths to the training data.
def
get_cremi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[str, Any] = {}, defect_augmentation_kwargs: Dict[str, Any] = {'p_drop_slice': 0.025, 'p_low_contrast': 0.025, 'p_deform_slice': 0.0, 'deformation_mode': 'compress'}, **kwargs) -> torch.utils.data.dataset.Dataset:
75def get_cremi_dataset( 76 path: Union[os.PathLike, str], 77 patch_shape: Tuple[int, int, int], 78 samples: Tuple[str, ...] = ("A", "B", "C"), 79 use_realigned: bool = False, 80 download: bool = False, 81 offsets: Optional[List[List[int]]] = None, 82 boundaries: bool = False, 83 rois: Dict[str, Any] = {}, 84 defect_augmentation_kwargs: Dict[str, Any] = { 85 "p_drop_slice": 0.025, 86 "p_low_contrast": 0.025, 87 "p_deform_slice": 0.0, 88 "deformation_mode": "compress", 89 }, 90 **kwargs, 91) -> Dataset: 92 """Get the CREMI dataset for the segmentation of neurons in EM. 93 94 Args: 95 path: Filepath to a folder where the downloaded data will be saved. 96 patch_shape: The patch shape to use for training. 97 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 98 use_realigned: Use the realigned instead of the original training data. 99 download: Whether to download the data if it is not present. 100 offsets: Offset values for affinity computation used as target. 101 boundaries: Whether to compute boundaries as the target. 102 rois: The region of interests to use for the samples. 103 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 104 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 105 106 Returns: 107 The segmentation dataset. 108 """ 109 assert len(patch_shape) == 3 110 if rois is not None: 111 assert isinstance(rois, dict) 112 113 data_paths = get_cremi_data(path, samples, download, use_realigned) 114 data_rois = [rois.get(name, np.s_[:, :, :]) for name in samples] 115 116 if defect_augmentation_kwargs is not None and "artifact_source" not in defect_augmentation_kwargs: 117 # download the defect volume 118 url = CREMI_URLS["defects"] 119 checksum = CHECKSUMS["defects"] 120 defect_path = os.path.join(path, "cremi_defects.h5") 121 util.download_source(defect_path, url, download, checksum) 122 defect_patch_shape = (1,) + tuple(patch_shape[1:]) 123 artifact_source = torch_em.transform.get_artifact_source(defect_path, defect_patch_shape, 124 min_mask_fraction=0.75, 125 raw_key="defect_sections/raw", 126 mask_key="defect_sections/mask") 127 defect_augmentation_kwargs.update({"artifact_source": artifact_source}) 128 129 raw_key = "volumes/raw" 130 label_key = "volumes/labels/neuron_ids" 131 132 # defect augmentations 133 if defect_augmentation_kwargs is not None: 134 raw_transform = torch_em.transform.get_raw_transform( 135 augmentation1=torch_em.transform.EMDefectAugmentation(**defect_augmentation_kwargs) 136 ) 137 kwargs = util.update_kwargs(kwargs, "raw_transform", raw_transform) 138 139 kwargs, _ = util.add_instance_label_transform( 140 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 141 ) 142 143 return torch_em.default_segmentation_dataset( 144 data_paths, raw_key, data_paths, label_key, patch_shape, rois=data_rois, **kwargs 145 )
Get the CREMI dataset for the segmentation of neurons in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- use_realigned: Use the realigned instead of the original training data.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- rois: The region of interests to use for the samples.
- defect_augmentation_kwargs: Keyword arguments for defect augmentations.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_cremi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[str, Any] = {}, defect_augmentation_kwargs: Dict[str, Any] = {'p_drop_slice': 0.025, 'p_low_contrast': 0.025, 'p_deform_slice': 0.0, 'deformation_mode': 'compress'}, **kwargs) -> torch.utils.data.dataloader.DataLoader:
148def get_cremi_loader( 149 path: Union[os.PathLike, str], 150 patch_shape: Tuple[int, int, int], 151 batch_size: int, 152 samples: Tuple[str, ...] = ("A", "B", "C"), 153 use_realigned: bool = False, 154 download: bool = False, 155 offsets: Optional[List[List[int]]] = None, 156 boundaries: bool = False, 157 rois: Dict[str, Any] = {}, 158 defect_augmentation_kwargs: Dict[str, Any] = { 159 "p_drop_slice": 0.025, 160 "p_low_contrast": 0.025, 161 "p_deform_slice": 0.0, 162 "deformation_mode": "compress", 163 }, 164 **kwargs, 165) -> DataLoader: 166 """Get the DataLoader for EM neuron segmentation in the CREMI dataset. 167 168 Args: 169 path: Filepath to a folder where the downloaded data will be saved. 170 patch_shape: The patch shape to use for training. 171 batch_size: The batch size for training. 172 samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'. 173 use_realigned: Use the realigned instead of the original training data. 174 download: Whether to download the data if it is not present. 175 offsets: Offset values for affinity computation used as target. 176 boundaries: Whether to compute boundaries as the target. 177 rois: The region of interests to use for the samples. 178 defect_augmentation_kwargs: Keyword arguments for defect augmentations. 179 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 180 181 Returns: 182 The DataLoader. 183 """ 184 dataset_kwargs, loader_kwargs = util.split_kwargs( 185 torch_em.default_segmentation_dataset, **kwargs 186 ) 187 ds = get_cremi_dataset( 188 path=path, 189 patch_shape=patch_shape, 190 samples=samples, 191 use_realigned=use_realigned, 192 download=download, 193 offsets=offsets, 194 boundaries=boundaries, 195 rois=rois, 196 defect_augmentation_kwargs=defect_augmentation_kwargs, 197 **dataset_kwargs, 198 ) 199 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the DataLoader for EM neuron segmentation in the CREMI dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
- use_realigned: Use the realigned instead of the original training data.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- rois: The region of interests to use for the samples.
- defect_augmentation_kwargs: Keyword arguments for defect augmentations.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.