torch_em.data.datasets.electron_microscopy.snemi
SNEMI is a dataset for neuron segmentation in EM.
It contains an annotated volumes from the mouse brain. The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054. Please cite it if you use this dataset for a publication.
1"""SNEMI is a dataset for neuron segmentation in EM. 2 3It contains an annotated volumes from the mouse brain. 4The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054. 5Please cite it if you use this dataset for a publication. 6""" 7 8import os 9from typing import List, Optional, Union, Tuple 10 11import torch_em 12from torch.utils.data import Dataset, DataLoader 13from .. import util 14 15SNEMI_URLS = { 16 "train": "https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download", 17 "test": "https://oc.embl.de/index.php/s/aRhphk35H23De2s/download" 18} 19CHECKSUMS = { 20 "train": "5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615", 21 "test": "3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa" 22} 23 24 25def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool) -> str: 26 """Download the SNEMI training data. 27 28 Args: 29 path: Filepath to a folder where the downloaded data will be saved. 30 sample: The sample to download, either 'train' or 'test'. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 The path to the downloaded data. 35 """ 36 os.makedirs(path, exist_ok=True) 37 data_path = os.path.join(path, f"snemi_{sample}.h5") 38 util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample]) 39 assert os.path.exists(data_path), data_path 40 return data_path 41 42 43def get_snemi_dataset( 44 path: Union[os.PathLike, str], 45 patch_shape: Tuple[int, int, int], 46 sample: str = "train", 47 download: bool = False, 48 offsets: Optional[List[List[int]]] = None, 49 boundaries: bool = False, 50 **kwargs, 51) -> Dataset: 52 """Get the SNEMI dataset for the segmentation of neurons in EM. 53 54 Args: 55 path: Filepath to a folder where the downloaded data will be saved. 56 patch_shape: The patch shape to use for training. 57 sample: The sample to download, either 'train' or 'test'. 58 download: Whether to download the data if it is not present. 59 offsets: Offset values for affinity computation used as target. 60 boundaries: Whether to compute boundaries as the target. 61 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 62 63 Returns: 64 The segmentation dataset. 65 """ 66 assert len(patch_shape) == 3 67 data_path = get_snemi_data(path, sample, download) 68 69 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 70 kwargs, _ = util.add_instance_label_transform( 71 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 72 ) 73 74 raw_key = "volumes/raw" 75 label_key = "volumes/labels/neuron_ids" 76 return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs) 77 78 79def get_snemi_loader( 80 path: Union[os.PathLike, str], 81 patch_shape: Tuple[int, int, int], 82 batch_size: int, 83 sample: str = "train", 84 download: bool = False, 85 offsets: Optional[List[List[int]]] = None, 86 boundaries: bool = False, 87 **kwargs, 88) -> DataLoader: 89 """Get the DataLoader for EM neuron segmentation in the SNEMI dataset. 90 91 Args: 92 path: Filepath to a folder where the downloaded data will be saved. 93 patch_shape: The patch shape to use for training. 94 batch_size: The batch size for training. 95 sample: The sample to download, either 'train' or 'test'. 96 download: Whether to download the data if it is not present. 97 offsets: Offset values for affinity computation used as target. 98 boundaries: Whether to compute boundaries as the target. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 100 101 Returns: 102 The DataLoader. 103 """ 104 ds_kwargs, loader_kwargs = util.split_kwargs( 105 torch_em.default_segmentation_dataset, **kwargs 106 ) 107 ds = get_snemi_dataset( 108 path=path, 109 patch_shape=patch_shape, 110 sample=sample, 111 download=download, 112 offsets=offsets, 113 boundaries=boundaries, 114 **ds_kwargs, 115 ) 116 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
SNEMI_URLS =
{'train': 'https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download', 'test': 'https://oc.embl.de/index.php/s/aRhphk35H23De2s/download'}
CHECKSUMS =
{'train': '5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615', 'test': '3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa'}
def
get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool) -> str:
26def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool) -> str: 27 """Download the SNEMI training data. 28 29 Args: 30 path: Filepath to a folder where the downloaded data will be saved. 31 sample: The sample to download, either 'train' or 'test'. 32 download: Whether to download the data if it is not present. 33 34 Returns: 35 The path to the downloaded data. 36 """ 37 os.makedirs(path, exist_ok=True) 38 data_path = os.path.join(path, f"snemi_{sample}.h5") 39 util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample]) 40 assert os.path.exists(data_path), data_path 41 return data_path
Download the SNEMI training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data.
def
get_snemi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
44def get_snemi_dataset( 45 path: Union[os.PathLike, str], 46 patch_shape: Tuple[int, int, int], 47 sample: str = "train", 48 download: bool = False, 49 offsets: Optional[List[List[int]]] = None, 50 boundaries: bool = False, 51 **kwargs, 52) -> Dataset: 53 """Get the SNEMI dataset for the segmentation of neurons in EM. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 patch_shape: The patch shape to use for training. 58 sample: The sample to download, either 'train' or 'test'. 59 download: Whether to download the data if it is not present. 60 offsets: Offset values for affinity computation used as target. 61 boundaries: Whether to compute boundaries as the target. 62 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 63 64 Returns: 65 The segmentation dataset. 66 """ 67 assert len(patch_shape) == 3 68 data_path = get_snemi_data(path, sample, download) 69 70 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 71 kwargs, _ = util.add_instance_label_transform( 72 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 73 ) 74 75 raw_key = "volumes/raw" 76 label_key = "volumes/labels/neuron_ids" 77 return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs)
Get the SNEMI dataset for the segmentation of neurons in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_snemi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
80def get_snemi_loader( 81 path: Union[os.PathLike, str], 82 patch_shape: Tuple[int, int, int], 83 batch_size: int, 84 sample: str = "train", 85 download: bool = False, 86 offsets: Optional[List[List[int]]] = None, 87 boundaries: bool = False, 88 **kwargs, 89) -> DataLoader: 90 """Get the DataLoader for EM neuron segmentation in the SNEMI dataset. 91 92 Args: 93 path: Filepath to a folder where the downloaded data will be saved. 94 patch_shape: The patch shape to use for training. 95 batch_size: The batch size for training. 96 sample: The sample to download, either 'train' or 'test'. 97 download: Whether to download the data if it is not present. 98 offsets: Offset values for affinity computation used as target. 99 boundaries: Whether to compute boundaries as the target. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 101 102 Returns: 103 The DataLoader. 104 """ 105 ds_kwargs, loader_kwargs = util.split_kwargs( 106 torch_em.default_segmentation_dataset, **kwargs 107 ) 108 ds = get_snemi_dataset( 109 path=path, 110 patch_shape=patch_shape, 111 sample=sample, 112 download=download, 113 offsets=offsets, 114 boundaries=boundaries, 115 **ds_kwargs, 116 ) 117 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the DataLoader for EM neuron segmentation in the SNEMI dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.