torch_em.data.datasets.electron_microscopy.snemi

SNEMI is a dataset for neuron segmentation in EM. It contains an annotated volumes from the mouse brain.

The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054. Please cite it if you use this dataset for a publication.

  1"""SNEMI is a dataset for neuron segmentation in EM.
  2It contains an annotated volumes from the mouse brain.
  3
  4The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054.
  5Please cite it if you use this dataset for a publication.
  6"""
  7
  8import os
  9from typing import List, Optional, Union, Tuple
 10
 11from torch.utils.data import Dataset, DataLoader
 12
 13import torch_em
 14
 15from .. import util
 16
 17
 18SNEMI_URLS = {
 19    "train": "https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download",
 20    "test": "https://oc.embl.de/index.php/s/aRhphk35H23De2s/download"
 21}
 22CHECKSUMS = {
 23    "train": "5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615",
 24    "test": "3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa"
 25}
 26
 27
 28def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool = False):
 29    """Download the SNEMI training data.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        sample: The sample to download, either 'train' or 'test'.
 34        download: Whether to download the data if it is not present.
 35    """
 36    os.makedirs(path, exist_ok=True)
 37    data_path = os.path.join(path, f"snemi_{sample}.h5")
 38    util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample])
 39
 40
 41def get_snemi_paths(path: Union[os.PathLike, str], sample: str, download: bool = False) -> str:
 42    """Get path to the SNEMI data.
 43
 44    Args:
 45        path: Filepath to a folder where the downloaded data is saved.
 46        sample: The sample to download, either 'train' or 'test'.
 47        download: Whether to download the data if it is not present.
 48
 49    Returns:
 50        The filepath for the stored data.
 51    """
 52    get_snemi_data(path, sample, download)
 53    data_path = os.path.join(path, f"snemi_{sample}.h5")
 54    assert os.path.exists(data_path), data_path
 55    return data_path
 56
 57
 58def get_snemi_dataset(
 59    path: Union[os.PathLike, str],
 60    patch_shape: Tuple[int, int, int],
 61    sample: str = "train",
 62    download: bool = False,
 63    offsets: Optional[List[List[int]]] = None,
 64    boundaries: bool = False,
 65    **kwargs,
 66) -> Dataset:
 67    """Get the SNEMI dataset for the segmentation of neurons in EM.
 68
 69    Args:
 70        path: Filepath to a folder where the downloaded data will be saved.
 71        patch_shape: The patch shape to use for training.
 72        sample: The sample to download, either 'train' or 'test'.
 73        download: Whether to download the data if it is not present.
 74        offsets: Offset values for affinity computation used as target.
 75        boundaries: Whether to compute boundaries as the target.
 76        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 77
 78    Returns:
 79       The segmentation dataset.
 80    """
 81    assert len(patch_shape) == 3
 82
 83    data_path = get_snemi_paths(path, sample, download)
 84
 85    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
 86    kwargs, _ = util.add_instance_label_transform(
 87        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
 88    )
 89
 90    return torch_em.default_segmentation_dataset(
 91        raw_paths=data_path,
 92        raw_key="volumes/raw",
 93        label_paths=data_path,
 94        label_key="volumes/labels/neuron_ids",
 95        patch_shape=patch_shape,
 96        **kwargs
 97    )
 98
 99
100def get_snemi_loader(
101    path: Union[os.PathLike, str],
102    patch_shape: Tuple[int, int, int],
103    batch_size: int,
104    sample: str = "train",
105    download: bool = False,
106    offsets: Optional[List[List[int]]] = None,
107    boundaries: bool = False,
108    **kwargs,
109) -> DataLoader:
110    """Get the DataLoader for EM neuron segmentation in the SNEMI dataset.
111
112    Args:
113        path: Filepath to a folder where the downloaded data will be saved.
114        patch_shape: The patch shape to use for training.
115        batch_size: The batch size for training.
116        sample: The sample to download, either 'train' or 'test'.
117        download: Whether to download the data if it is not present.
118        offsets: Offset values for affinity computation used as target.
119        boundaries: Whether to compute boundaries as the target.
120        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
121
122    Returns:
123        The DataLoader.
124    """
125    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
126    ds = get_snemi_dataset(
127        path=path,
128        patch_shape=patch_shape,
129        sample=sample,
130        download=download,
131        offsets=offsets,
132        boundaries=boundaries,
133        **ds_kwargs,
134    )
135    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
SNEMI_URLS = {'train': 'https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download', 'test': 'https://oc.embl.de/index.php/s/aRhphk35H23De2s/download'}
CHECKSUMS = {'train': '5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615', 'test': '3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa'}
def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool = False):
29def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool = False):
30    """Download the SNEMI training data.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        sample: The sample to download, either 'train' or 'test'.
35        download: Whether to download the data if it is not present.
36    """
37    os.makedirs(path, exist_ok=True)
38    data_path = os.path.join(path, f"snemi_{sample}.h5")
39    util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample])

Download the SNEMI training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
def get_snemi_paths( path: Union[os.PathLike, str], sample: str, download: bool = False) -> str:
42def get_snemi_paths(path: Union[os.PathLike, str], sample: str, download: bool = False) -> str:
43    """Get path to the SNEMI data.
44
45    Args:
46        path: Filepath to a folder where the downloaded data is saved.
47        sample: The sample to download, either 'train' or 'test'.
48        download: Whether to download the data if it is not present.
49
50    Returns:
51        The filepath for the stored data.
52    """
53    get_snemi_data(path, sample, download)
54    data_path = os.path.join(path, f"snemi_{sample}.h5")
55    assert os.path.exists(data_path), data_path
56    return data_path

Get path to the SNEMI data.

Arguments:
  • path: Filepath to a folder where the downloaded data is saved.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
Returns:

The filepath for the stored data.

def get_snemi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
59def get_snemi_dataset(
60    path: Union[os.PathLike, str],
61    patch_shape: Tuple[int, int, int],
62    sample: str = "train",
63    download: bool = False,
64    offsets: Optional[List[List[int]]] = None,
65    boundaries: bool = False,
66    **kwargs,
67) -> Dataset:
68    """Get the SNEMI dataset for the segmentation of neurons in EM.
69
70    Args:
71        path: Filepath to a folder where the downloaded data will be saved.
72        patch_shape: The patch shape to use for training.
73        sample: The sample to download, either 'train' or 'test'.
74        download: Whether to download the data if it is not present.
75        offsets: Offset values for affinity computation used as target.
76        boundaries: Whether to compute boundaries as the target.
77        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
78
79    Returns:
80       The segmentation dataset.
81    """
82    assert len(patch_shape) == 3
83
84    data_path = get_snemi_paths(path, sample, download)
85
86    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
87    kwargs, _ = util.add_instance_label_transform(
88        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
89    )
90
91    return torch_em.default_segmentation_dataset(
92        raw_paths=data_path,
93        raw_key="volumes/raw",
94        label_paths=data_path,
95        label_key="volumes/labels/neuron_ids",
96        patch_shape=patch_shape,
97        **kwargs
98    )

Get the SNEMI dataset for the segmentation of neurons in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_snemi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
101def get_snemi_loader(
102    path: Union[os.PathLike, str],
103    patch_shape: Tuple[int, int, int],
104    batch_size: int,
105    sample: str = "train",
106    download: bool = False,
107    offsets: Optional[List[List[int]]] = None,
108    boundaries: bool = False,
109    **kwargs,
110) -> DataLoader:
111    """Get the DataLoader for EM neuron segmentation in the SNEMI dataset.
112
113    Args:
114        path: Filepath to a folder where the downloaded data will be saved.
115        patch_shape: The patch shape to use for training.
116        batch_size: The batch size for training.
117        sample: The sample to download, either 'train' or 'test'.
118        download: Whether to download the data if it is not present.
119        offsets: Offset values for affinity computation used as target.
120        boundaries: Whether to compute boundaries as the target.
121        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
122
123    Returns:
124        The DataLoader.
125    """
126    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
127    ds = get_snemi_dataset(
128        path=path,
129        patch_shape=patch_shape,
130        sample=sample,
131        download=download,
132        offsets=offsets,
133        boundaries=boundaries,
134        **ds_kwargs,
135    )
136    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the DataLoader for EM neuron segmentation in the SNEMI dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.