torch_em.data.datasets.electron_microscopy.cremi

CREMI is a dataset for neuron segmentation in EM.

It contains three annotated volumes from the adult fruit-fly brain. It was held as a challenge at MICCAI 2016. For details on the dataset check out https://cremi.org/.

  1"""CREMI is a dataset for neuron segmentation in EM.
  2
  3It contains three annotated volumes from the adult fruit-fly brain.
  4It was held as a challenge at MICCAI 2016. For details on the dataset check out https://cremi.org/.
  5"""
  6# TODO add support for realigned volumes
  7
  8import os
  9from typing import Any, Dict, List, Optional, Tuple, Union
 10
 11import numpy as np
 12import torch_em
 13from torch.utils.data import Dataset, DataLoader
 14
 15from .. import util
 16
 17CREMI_URLS = {
 18    "original": {
 19        "A": "https://cremi.org/static/data/sample_A_20160501.hdf",
 20        "B": "https://cremi.org/static/data/sample_B_20160501.hdf",
 21        "C": "https://cremi.org/static/data/sample_C_20160501.hdf",
 22    },
 23    "realigned": {},
 24    "defects": "https://zenodo.org/record/5767036/files/sample_ABC_padded_defects.h5"
 25}
 26CHECKSUMS = {
 27    "original": {
 28        "A": "4c563d1b78acb2bcfb3ea958b6fe1533422f7f4a19f3e05b600bfa11430b510d",
 29        "B": "887e85521e00deead18c94a21ad71f278d88a5214c7edeed943130a1f4bb48b8",
 30        "C": "2874496f224d222ebc29d0e4753e8c458093e1d37bc53acd1b69b19ed1ae7052",
 31    },
 32    "realigned": {},
 33    "defects": "7b06ffa34733b2c32956ea5005e0cf345e7d3a27477f42f7c905701cdc947bd0"
 34}
 35
 36
 37def get_cremi_data(
 38    path: Union[os.PathLike, str],
 39    samples: Tuple[str],
 40    download: bool,
 41    use_realigned: bool = False,
 42) -> List[str]:
 43    """Download the CREMI training data.
 44
 45    Args:
 46        path: Filepath to a folder where the downloaded data will be saved.
 47        samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
 48        download: Whether to download the data if it is not present.
 49        use_realigned: Use the realigned instead of the original training data.
 50
 51    Returns:
 52        The filepaths to the training data.
 53    """
 54    if use_realigned:
 55        # we need to sample batches in this case
 56        # sampler = torch_em.data.MinForegroundSampler(min_fraction=0.05, p_reject=.75)
 57        raise NotImplementedError
 58    else:
 59        urls = CREMI_URLS["original"]
 60        checksums = CHECKSUMS["original"]
 61
 62    os.makedirs(path, exist_ok=True)
 63    data_paths = []
 64    for name in samples:
 65        url = urls[name]
 66        checksum = checksums[name]
 67        data_path = os.path.join(path, f"sample{name}.h5")
 68        # CREMI SSL certificates expired, so we need to disable verification
 69        util.download_source(data_path, url, download, checksum, verify=False)
 70        data_paths.append(data_path)
 71    return data_paths
 72
 73
 74def get_cremi_dataset(
 75    path: Union[os.PathLike, str],
 76    patch_shape: Tuple[int, int, int],
 77    samples: Tuple[str, ...] = ("A", "B", "C"),
 78    use_realigned: bool = False,
 79    download: bool = False,
 80    offsets: Optional[List[List[int]]] = None,
 81    boundaries: bool = False,
 82    rois: Dict[str, Any] = {},
 83    defect_augmentation_kwargs: Dict[str, Any] = {
 84        "p_drop_slice": 0.025,
 85        "p_low_contrast": 0.025,
 86        "p_deform_slice": 0.0,
 87        "deformation_mode": "compress",
 88    },
 89    **kwargs,
 90) -> Dataset:
 91    """Get the CREMI dataset for the segmentation of neurons in EM.
 92
 93    Args:
 94        path: Filepath to a folder where the downloaded data will be saved.
 95        patch_shape: The patch shape to use for training.
 96        samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
 97        use_realigned: Use the realigned instead of the original training data.
 98        download: Whether to download the data if it is not present.
 99        offsets: Offset values for affinity computation used as target.
100        boundaries: Whether to compute boundaries as the target.
101        rois: The region of interests to use for the samples.
102        defect_augmentation_kwargs: Keyword arguments for defect augmentations.
103        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
104
105    Returns:
106       The segmentation dataset.
107    """
108    assert len(patch_shape) == 3
109    if rois is not None:
110        assert isinstance(rois, dict)
111
112    data_paths = get_cremi_data(path, samples, download, use_realigned)
113    data_rois = [rois.get(name, np.s_[:, :, :]) for name in samples]
114
115    if defect_augmentation_kwargs is not None and "artifact_source" not in defect_augmentation_kwargs:
116        # download the defect volume
117        url = CREMI_URLS["defects"]
118        checksum = CHECKSUMS["defects"]
119        defect_path = os.path.join(path, "cremi_defects.h5")
120        util.download_source(defect_path, url, download, checksum)
121        defect_patch_shape = (1,) + tuple(patch_shape[1:])
122        artifact_source = torch_em.transform.get_artifact_source(defect_path, defect_patch_shape,
123                                                                 min_mask_fraction=0.75,
124                                                                 raw_key="defect_sections/raw",
125                                                                 mask_key="defect_sections/mask")
126        defect_augmentation_kwargs.update({"artifact_source": artifact_source})
127
128    raw_key = "volumes/raw"
129    label_key = "volumes/labels/neuron_ids"
130
131    # defect augmentations
132    if defect_augmentation_kwargs is not None:
133        raw_transform = torch_em.transform.get_raw_transform(
134            augmentation1=torch_em.transform.EMDefectAugmentation(**defect_augmentation_kwargs)
135        )
136        kwargs = util.update_kwargs(kwargs, "raw_transform", raw_transform)
137
138    kwargs, _ = util.add_instance_label_transform(
139        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
140    )
141
142    return torch_em.default_segmentation_dataset(
143        data_paths, raw_key, data_paths, label_key, patch_shape, rois=data_rois, **kwargs
144    )
145
146
147def get_cremi_loader(
148    path: Union[os.PathLike, str],
149    patch_shape: Tuple[int, int, int],
150    batch_size: int,
151    samples: Tuple[str, ...] = ("A", "B", "C"),
152    use_realigned: bool = False,
153    download: bool = False,
154    offsets: Optional[List[List[int]]] = None,
155    boundaries: bool = False,
156    rois: Dict[str, Any] = {},
157    defect_augmentation_kwargs: Dict[str, Any] = {
158        "p_drop_slice": 0.025,
159        "p_low_contrast": 0.025,
160        "p_deform_slice": 0.0,
161        "deformation_mode": "compress",
162    },
163    **kwargs,
164) -> DataLoader:
165    """Get the DataLoader for EM neuron segmentation in the CREMI dataset.
166
167    Args:
168        path: Filepath to a folder where the downloaded data will be saved.
169        patch_shape: The patch shape to use for training.
170        batch_size: The batch size for training.
171        samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
172        use_realigned: Use the realigned instead of the original training data.
173        download: Whether to download the data if it is not present.
174        offsets: Offset values for affinity computation used as target.
175        boundaries: Whether to compute boundaries as the target.
176        rois: The region of interests to use for the samples.
177        defect_augmentation_kwargs: Keyword arguments for defect augmentations.
178        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
179
180    Returns:
181        The DataLoader.
182    """
183    dataset_kwargs, loader_kwargs = util.split_kwargs(
184        torch_em.default_segmentation_dataset, **kwargs
185    )
186    ds = get_cremi_dataset(
187        path=path,
188        patch_shape=patch_shape,
189        samples=samples,
190        use_realigned=use_realigned,
191        download=download,
192        offsets=offsets,
193        boundaries=boundaries,
194        rois=rois,
195        defect_augmentation_kwargs=defect_augmentation_kwargs,
196        **dataset_kwargs,
197    )
198    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
CREMI_URLS = {'original': {'A': 'https://cremi.org/static/data/sample_A_20160501.hdf', 'B': 'https://cremi.org/static/data/sample_B_20160501.hdf', 'C': 'https://cremi.org/static/data/sample_C_20160501.hdf'}, 'realigned': {}, 'defects': 'https://zenodo.org/record/5767036/files/sample_ABC_padded_defects.h5'}
CHECKSUMS = {'original': {'A': '4c563d1b78acb2bcfb3ea958b6fe1533422f7f4a19f3e05b600bfa11430b510d', 'B': '887e85521e00deead18c94a21ad71f278d88a5214c7edeed943130a1f4bb48b8', 'C': '2874496f224d222ebc29d0e4753e8c458093e1d37bc53acd1b69b19ed1ae7052'}, 'realigned': {}, 'defects': '7b06ffa34733b2c32956ea5005e0cf345e7d3a27477f42f7c905701cdc947bd0'}
def get_cremi_data( path: Union[os.PathLike, str], samples: Tuple[str], download: bool, use_realigned: bool = False) -> List[str]:
38def get_cremi_data(
39    path: Union[os.PathLike, str],
40    samples: Tuple[str],
41    download: bool,
42    use_realigned: bool = False,
43) -> List[str]:
44    """Download the CREMI training data.
45
46    Args:
47        path: Filepath to a folder where the downloaded data will be saved.
48        samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
49        download: Whether to download the data if it is not present.
50        use_realigned: Use the realigned instead of the original training data.
51
52    Returns:
53        The filepaths to the training data.
54    """
55    if use_realigned:
56        # we need to sample batches in this case
57        # sampler = torch_em.data.MinForegroundSampler(min_fraction=0.05, p_reject=.75)
58        raise NotImplementedError
59    else:
60        urls = CREMI_URLS["original"]
61        checksums = CHECKSUMS["original"]
62
63    os.makedirs(path, exist_ok=True)
64    data_paths = []
65    for name in samples:
66        url = urls[name]
67        checksum = checksums[name]
68        data_path = os.path.join(path, f"sample{name}.h5")
69        # CREMI SSL certificates expired, so we need to disable verification
70        util.download_source(data_path, url, download, checksum, verify=False)
71        data_paths.append(data_path)
72    return data_paths

Download the CREMI training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
  • download: Whether to download the data if it is not present.
  • use_realigned: Use the realigned instead of the original training data.
Returns:

The filepaths to the training data.

def get_cremi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[str, Any] = {}, defect_augmentation_kwargs: Dict[str, Any] = {'p_drop_slice': 0.025, 'p_low_contrast': 0.025, 'p_deform_slice': 0.0, 'deformation_mode': 'compress'}, **kwargs) -> torch.utils.data.dataset.Dataset:
 75def get_cremi_dataset(
 76    path: Union[os.PathLike, str],
 77    patch_shape: Tuple[int, int, int],
 78    samples: Tuple[str, ...] = ("A", "B", "C"),
 79    use_realigned: bool = False,
 80    download: bool = False,
 81    offsets: Optional[List[List[int]]] = None,
 82    boundaries: bool = False,
 83    rois: Dict[str, Any] = {},
 84    defect_augmentation_kwargs: Dict[str, Any] = {
 85        "p_drop_slice": 0.025,
 86        "p_low_contrast": 0.025,
 87        "p_deform_slice": 0.0,
 88        "deformation_mode": "compress",
 89    },
 90    **kwargs,
 91) -> Dataset:
 92    """Get the CREMI dataset for the segmentation of neurons in EM.
 93
 94    Args:
 95        path: Filepath to a folder where the downloaded data will be saved.
 96        patch_shape: The patch shape to use for training.
 97        samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
 98        use_realigned: Use the realigned instead of the original training data.
 99        download: Whether to download the data if it is not present.
100        offsets: Offset values for affinity computation used as target.
101        boundaries: Whether to compute boundaries as the target.
102        rois: The region of interests to use for the samples.
103        defect_augmentation_kwargs: Keyword arguments for defect augmentations.
104        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
105
106    Returns:
107       The segmentation dataset.
108    """
109    assert len(patch_shape) == 3
110    if rois is not None:
111        assert isinstance(rois, dict)
112
113    data_paths = get_cremi_data(path, samples, download, use_realigned)
114    data_rois = [rois.get(name, np.s_[:, :, :]) for name in samples]
115
116    if defect_augmentation_kwargs is not None and "artifact_source" not in defect_augmentation_kwargs:
117        # download the defect volume
118        url = CREMI_URLS["defects"]
119        checksum = CHECKSUMS["defects"]
120        defect_path = os.path.join(path, "cremi_defects.h5")
121        util.download_source(defect_path, url, download, checksum)
122        defect_patch_shape = (1,) + tuple(patch_shape[1:])
123        artifact_source = torch_em.transform.get_artifact_source(defect_path, defect_patch_shape,
124                                                                 min_mask_fraction=0.75,
125                                                                 raw_key="defect_sections/raw",
126                                                                 mask_key="defect_sections/mask")
127        defect_augmentation_kwargs.update({"artifact_source": artifact_source})
128
129    raw_key = "volumes/raw"
130    label_key = "volumes/labels/neuron_ids"
131
132    # defect augmentations
133    if defect_augmentation_kwargs is not None:
134        raw_transform = torch_em.transform.get_raw_transform(
135            augmentation1=torch_em.transform.EMDefectAugmentation(**defect_augmentation_kwargs)
136        )
137        kwargs = util.update_kwargs(kwargs, "raw_transform", raw_transform)
138
139    kwargs, _ = util.add_instance_label_transform(
140        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
141    )
142
143    return torch_em.default_segmentation_dataset(
144        data_paths, raw_key, data_paths, label_key, patch_shape, rois=data_rois, **kwargs
145    )

Get the CREMI dataset for the segmentation of neurons in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
  • use_realigned: Use the realigned instead of the original training data.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • rois: The region of interests to use for the samples.
  • defect_augmentation_kwargs: Keyword arguments for defect augmentations.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_cremi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, samples: Tuple[str, ...] = ('A', 'B', 'C'), use_realigned: bool = False, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[str, Any] = {}, defect_augmentation_kwargs: Dict[str, Any] = {'p_drop_slice': 0.025, 'p_low_contrast': 0.025, 'p_deform_slice': 0.0, 'deformation_mode': 'compress'}, **kwargs) -> torch.utils.data.dataloader.DataLoader:
148def get_cremi_loader(
149    path: Union[os.PathLike, str],
150    patch_shape: Tuple[int, int, int],
151    batch_size: int,
152    samples: Tuple[str, ...] = ("A", "B", "C"),
153    use_realigned: bool = False,
154    download: bool = False,
155    offsets: Optional[List[List[int]]] = None,
156    boundaries: bool = False,
157    rois: Dict[str, Any] = {},
158    defect_augmentation_kwargs: Dict[str, Any] = {
159        "p_drop_slice": 0.025,
160        "p_low_contrast": 0.025,
161        "p_deform_slice": 0.0,
162        "deformation_mode": "compress",
163    },
164    **kwargs,
165) -> DataLoader:
166    """Get the DataLoader for EM neuron segmentation in the CREMI dataset.
167
168    Args:
169        path: Filepath to a folder where the downloaded data will be saved.
170        patch_shape: The patch shape to use for training.
171        batch_size: The batch size for training.
172        samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
173        use_realigned: Use the realigned instead of the original training data.
174        download: Whether to download the data if it is not present.
175        offsets: Offset values for affinity computation used as target.
176        boundaries: Whether to compute boundaries as the target.
177        rois: The region of interests to use for the samples.
178        defect_augmentation_kwargs: Keyword arguments for defect augmentations.
179        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
180
181    Returns:
182        The DataLoader.
183    """
184    dataset_kwargs, loader_kwargs = util.split_kwargs(
185        torch_em.default_segmentation_dataset, **kwargs
186    )
187    ds = get_cremi_dataset(
188        path=path,
189        patch_shape=patch_shape,
190        samples=samples,
191        use_realigned=use_realigned,
192        download=download,
193        offsets=offsets,
194        boundaries=boundaries,
195        rois=rois,
196        defect_augmentation_kwargs=defect_augmentation_kwargs,
197        **dataset_kwargs,
198    )
199    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the DataLoader for EM neuron segmentation in the CREMI dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • samples: The CREMI samples to use. The available samples are 'A', 'B', 'C'.
  • use_realigned: Use the realigned instead of the original training data.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • rois: The region of interests to use for the samples.
  • defect_augmentation_kwargs: Keyword arguments for defect augmentations.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.