torch_em.data.datasets.electron_microscopy.snemi

SNEMI is a dataset for neuron segmentation in EM.

It contains an annotated volumes from the mouse brain. The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054. Please cite it if you use this dataset for a publication.

  1"""SNEMI is a dataset for neuron segmentation in EM.
  2
  3It contains an annotated volumes from the mouse brain.
  4The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054.
  5Please cite it if you use this dataset for a publication.
  6"""
  7
  8import os
  9from typing import List, Optional, Union, Tuple
 10
 11import torch_em
 12from torch.utils.data import Dataset, DataLoader
 13from .. import util
 14
 15SNEMI_URLS = {
 16    "train": "https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download",
 17    "test": "https://oc.embl.de/index.php/s/aRhphk35H23De2s/download"
 18}
 19CHECKSUMS = {
 20    "train": "5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615",
 21    "test": "3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa"
 22}
 23
 24
 25def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool) -> str:
 26    """Download the SNEMI training data.
 27
 28    Args:
 29        path: Filepath to a folder where the downloaded data will be saved.
 30        sample: The sample to download, either 'train' or 'test'.
 31        download: Whether to download the data if it is not present.
 32
 33    Returns:
 34        The path to the downloaded data.
 35    """
 36    os.makedirs(path, exist_ok=True)
 37    data_path = os.path.join(path, f"snemi_{sample}.h5")
 38    util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample])
 39    assert os.path.exists(data_path), data_path
 40    return data_path
 41
 42
 43def get_snemi_dataset(
 44    path: Union[os.PathLike, str],
 45    patch_shape: Tuple[int, int, int],
 46    sample: str = "train",
 47    download: bool = False,
 48    offsets: Optional[List[List[int]]] = None,
 49    boundaries: bool = False,
 50    **kwargs,
 51) -> Dataset:
 52    """Get the SNEMI dataset for the segmentation of neurons in EM.
 53
 54    Args:
 55        path: Filepath to a folder where the downloaded data will be saved.
 56        patch_shape: The patch shape to use for training.
 57        sample: The sample to download, either 'train' or 'test'.
 58        download: Whether to download the data if it is not present.
 59        offsets: Offset values for affinity computation used as target.
 60        boundaries: Whether to compute boundaries as the target.
 61        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 62
 63    Returns:
 64       The segmentation dataset.
 65    """
 66    assert len(patch_shape) == 3
 67    data_path = get_snemi_data(path, sample, download)
 68
 69    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
 70    kwargs, _ = util.add_instance_label_transform(
 71        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
 72    )
 73
 74    raw_key = "volumes/raw"
 75    label_key = "volumes/labels/neuron_ids"
 76    return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs)
 77
 78
 79def get_snemi_loader(
 80    path: Union[os.PathLike, str],
 81    patch_shape: Tuple[int, int, int],
 82    batch_size: int,
 83    sample: str = "train",
 84    download: bool = False,
 85    offsets: Optional[List[List[int]]] = None,
 86    boundaries: bool = False,
 87    **kwargs,
 88) -> DataLoader:
 89    """Get the DataLoader for EM neuron segmentation in the SNEMI dataset.
 90
 91    Args:
 92        path: Filepath to a folder where the downloaded data will be saved.
 93        patch_shape: The patch shape to use for training.
 94        batch_size: The batch size for training.
 95        sample: The sample to download, either 'train' or 'test'.
 96        download: Whether to download the data if it is not present.
 97        offsets: Offset values for affinity computation used as target.
 98        boundaries: Whether to compute boundaries as the target.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
100
101    Returns:
102        The DataLoader.
103    """
104    ds_kwargs, loader_kwargs = util.split_kwargs(
105        torch_em.default_segmentation_dataset, **kwargs
106    )
107    ds = get_snemi_dataset(
108        path=path,
109        patch_shape=patch_shape,
110        sample=sample,
111        download=download,
112        offsets=offsets,
113        boundaries=boundaries,
114        **ds_kwargs,
115    )
116    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
SNEMI_URLS = {'train': 'https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download', 'test': 'https://oc.embl.de/index.php/s/aRhphk35H23De2s/download'}
CHECKSUMS = {'train': '5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615', 'test': '3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa'}
def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool) -> str:
26def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool) -> str:
27    """Download the SNEMI training data.
28
29    Args:
30        path: Filepath to a folder where the downloaded data will be saved.
31        sample: The sample to download, either 'train' or 'test'.
32        download: Whether to download the data if it is not present.
33
34    Returns:
35        The path to the downloaded data.
36    """
37    os.makedirs(path, exist_ok=True)
38    data_path = os.path.join(path, f"snemi_{sample}.h5")
39    util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample])
40    assert os.path.exists(data_path), data_path
41    return data_path

Download the SNEMI training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
Returns:

The path to the downloaded data.

def get_snemi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
44def get_snemi_dataset(
45    path: Union[os.PathLike, str],
46    patch_shape: Tuple[int, int, int],
47    sample: str = "train",
48    download: bool = False,
49    offsets: Optional[List[List[int]]] = None,
50    boundaries: bool = False,
51    **kwargs,
52) -> Dataset:
53    """Get the SNEMI dataset for the segmentation of neurons in EM.
54
55    Args:
56        path: Filepath to a folder where the downloaded data will be saved.
57        patch_shape: The patch shape to use for training.
58        sample: The sample to download, either 'train' or 'test'.
59        download: Whether to download the data if it is not present.
60        offsets: Offset values for affinity computation used as target.
61        boundaries: Whether to compute boundaries as the target.
62        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
63
64    Returns:
65       The segmentation dataset.
66    """
67    assert len(patch_shape) == 3
68    data_path = get_snemi_data(path, sample, download)
69
70    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
71    kwargs, _ = util.add_instance_label_transform(
72        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
73    )
74
75    raw_key = "volumes/raw"
76    label_key = "volumes/labels/neuron_ids"
77    return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs)

Get the SNEMI dataset for the segmentation of neurons in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_snemi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
 80def get_snemi_loader(
 81    path: Union[os.PathLike, str],
 82    patch_shape: Tuple[int, int, int],
 83    batch_size: int,
 84    sample: str = "train",
 85    download: bool = False,
 86    offsets: Optional[List[List[int]]] = None,
 87    boundaries: bool = False,
 88    **kwargs,
 89) -> DataLoader:
 90    """Get the DataLoader for EM neuron segmentation in the SNEMI dataset.
 91
 92    Args:
 93        path: Filepath to a folder where the downloaded data will be saved.
 94        patch_shape: The patch shape to use for training.
 95        batch_size: The batch size for training.
 96        sample: The sample to download, either 'train' or 'test'.
 97        download: Whether to download the data if it is not present.
 98        offsets: Offset values for affinity computation used as target.
 99        boundaries: Whether to compute boundaries as the target.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
101
102    Returns:
103        The DataLoader.
104    """
105    ds_kwargs, loader_kwargs = util.split_kwargs(
106        torch_em.default_segmentation_dataset, **kwargs
107    )
108    ds = get_snemi_dataset(
109        path=path,
110        patch_shape=patch_shape,
111        sample=sample,
112        download=download,
113        offsets=offsets,
114        boundaries=boundaries,
115        **ds_kwargs,
116    )
117    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the DataLoader for EM neuron segmentation in the SNEMI dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample: The sample to download, either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.