torch_em.data.datasets.electron_microscopy.snemi
SNEMI is a dataset for neuron segmentation in EM. It contains an annotated volumes from the mouse brain.
The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054. Please cite it if you use this dataset for a publication.
1"""SNEMI is a dataset for neuron segmentation in EM. 2It contains an annotated volumes from the mouse brain. 3 4The data is part of the publication https://doi.org/10.1016/j.cell.2015.06.054. 5Please cite it if you use this dataset for a publication. 6""" 7 8import os 9from typing import List, Optional, Union, Tuple 10 11from torch.utils.data import Dataset, DataLoader 12 13import torch_em 14 15from .. import util 16 17 18SNEMI_URLS = { 19 "train": "https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download", 20 "test": "https://oc.embl.de/index.php/s/aRhphk35H23De2s/download" 21} 22CHECKSUMS = { 23 "train": "5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615", 24 "test": "3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa" 25} 26 27 28def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool = False): 29 """Download the SNEMI training data. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 sample: The sample to download, either 'train' or 'test'. 34 download: Whether to download the data if it is not present. 35 """ 36 os.makedirs(path, exist_ok=True) 37 data_path = os.path.join(path, f"snemi_{sample}.h5") 38 util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample]) 39 40 41def get_snemi_paths(path: Union[os.PathLike, str], sample: str, download: bool = False) -> str: 42 """Get path to the SNEMI data. 43 44 Args: 45 path: Filepath to a folder where the downloaded data is saved. 46 sample: The sample to download, either 'train' or 'test'. 47 download: Whether to download the data if it is not present. 48 49 Returns: 50 The filepath for the stored data. 51 """ 52 get_snemi_data(path, sample, download) 53 data_path = os.path.join(path, f"snemi_{sample}.h5") 54 assert os.path.exists(data_path), data_path 55 return data_path 56 57 58def get_snemi_dataset( 59 path: Union[os.PathLike, str], 60 patch_shape: Tuple[int, int, int], 61 sample: str = "train", 62 download: bool = False, 63 offsets: Optional[List[List[int]]] = None, 64 boundaries: bool = False, 65 **kwargs, 66) -> Dataset: 67 """Get the SNEMI dataset for the segmentation of neurons in EM. 68 69 Args: 70 path: Filepath to a folder where the downloaded data will be saved. 71 patch_shape: The patch shape to use for training. 72 sample: The sample to download, either 'train' or 'test'. 73 download: Whether to download the data if it is not present. 74 offsets: Offset values for affinity computation used as target. 75 boundaries: Whether to compute boundaries as the target. 76 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 77 78 Returns: 79 The segmentation dataset. 80 """ 81 assert len(patch_shape) == 3 82 83 data_path = get_snemi_paths(path, sample, download) 84 85 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 86 kwargs, _ = util.add_instance_label_transform( 87 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 88 ) 89 90 return torch_em.default_segmentation_dataset( 91 raw_paths=data_path, 92 raw_key="volumes/raw", 93 label_paths=data_path, 94 label_key="volumes/labels/neuron_ids", 95 patch_shape=patch_shape, 96 **kwargs 97 ) 98 99 100def get_snemi_loader( 101 path: Union[os.PathLike, str], 102 patch_shape: Tuple[int, int, int], 103 batch_size: int, 104 sample: str = "train", 105 download: bool = False, 106 offsets: Optional[List[List[int]]] = None, 107 boundaries: bool = False, 108 **kwargs, 109) -> DataLoader: 110 """Get the DataLoader for EM neuron segmentation in the SNEMI dataset. 111 112 Args: 113 path: Filepath to a folder where the downloaded data will be saved. 114 patch_shape: The patch shape to use for training. 115 batch_size: The batch size for training. 116 sample: The sample to download, either 'train' or 'test'. 117 download: Whether to download the data if it is not present. 118 offsets: Offset values for affinity computation used as target. 119 boundaries: Whether to compute boundaries as the target. 120 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 121 122 Returns: 123 The DataLoader. 124 """ 125 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 126 ds = get_snemi_dataset( 127 path=path, 128 patch_shape=patch_shape, 129 sample=sample, 130 download=download, 131 offsets=offsets, 132 boundaries=boundaries, 133 **ds_kwargs, 134 ) 135 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
SNEMI_URLS =
{'train': 'https://oc.embl.de/index.php/s/43iMotlXPyAB39z/download', 'test': 'https://oc.embl.de/index.php/s/aRhphk35H23De2s/download'}
CHECKSUMS =
{'train': '5b130a24d9eb23d972fede0f1a403bc05f6808b361cfa22eff23b930b12f0615', 'test': '3df3920a0ddec6897105845f842b2665d37a47c2d1b96d4f4565682e315a59fa'}
def
get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool = False):
29def get_snemi_data(path: Union[os.PathLike, str], sample: str, download: bool = False): 30 """Download the SNEMI training data. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 sample: The sample to download, either 'train' or 'test'. 35 download: Whether to download the data if it is not present. 36 """ 37 os.makedirs(path, exist_ok=True) 38 data_path = os.path.join(path, f"snemi_{sample}.h5") 39 util.download_source(data_path, SNEMI_URLS[sample], download, CHECKSUMS[sample])
Download the SNEMI training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
def
get_snemi_paths( path: Union[os.PathLike, str], sample: str, download: bool = False) -> str:
42def get_snemi_paths(path: Union[os.PathLike, str], sample: str, download: bool = False) -> str: 43 """Get path to the SNEMI data. 44 45 Args: 46 path: Filepath to a folder where the downloaded data is saved. 47 sample: The sample to download, either 'train' or 'test'. 48 download: Whether to download the data if it is not present. 49 50 Returns: 51 The filepath for the stored data. 52 """ 53 get_snemi_data(path, sample, download) 54 data_path = os.path.join(path, f"snemi_{sample}.h5") 55 assert os.path.exists(data_path), data_path 56 return data_path
Get path to the SNEMI data.
Arguments:
- path: Filepath to a folder where the downloaded data is saved.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the stored data.
def
get_snemi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
59def get_snemi_dataset( 60 path: Union[os.PathLike, str], 61 patch_shape: Tuple[int, int, int], 62 sample: str = "train", 63 download: bool = False, 64 offsets: Optional[List[List[int]]] = None, 65 boundaries: bool = False, 66 **kwargs, 67) -> Dataset: 68 """Get the SNEMI dataset for the segmentation of neurons in EM. 69 70 Args: 71 path: Filepath to a folder where the downloaded data will be saved. 72 patch_shape: The patch shape to use for training. 73 sample: The sample to download, either 'train' or 'test'. 74 download: Whether to download the data if it is not present. 75 offsets: Offset values for affinity computation used as target. 76 boundaries: Whether to compute boundaries as the target. 77 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 78 79 Returns: 80 The segmentation dataset. 81 """ 82 assert len(patch_shape) == 3 83 84 data_path = get_snemi_paths(path, sample, download) 85 86 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 87 kwargs, _ = util.add_instance_label_transform( 88 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 89 ) 90 91 return torch_em.default_segmentation_dataset( 92 raw_paths=data_path, 93 raw_key="volumes/raw", 94 label_paths=data_path, 95 label_key="volumes/labels/neuron_ids", 96 patch_shape=patch_shape, 97 **kwargs 98 )
Get the SNEMI dataset for the segmentation of neurons in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_snemi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample: str = 'train', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
101def get_snemi_loader( 102 path: Union[os.PathLike, str], 103 patch_shape: Tuple[int, int, int], 104 batch_size: int, 105 sample: str = "train", 106 download: bool = False, 107 offsets: Optional[List[List[int]]] = None, 108 boundaries: bool = False, 109 **kwargs, 110) -> DataLoader: 111 """Get the DataLoader for EM neuron segmentation in the SNEMI dataset. 112 113 Args: 114 path: Filepath to a folder where the downloaded data will be saved. 115 patch_shape: The patch shape to use for training. 116 batch_size: The batch size for training. 117 sample: The sample to download, either 'train' or 'test'. 118 download: Whether to download the data if it is not present. 119 offsets: Offset values for affinity computation used as target. 120 boundaries: Whether to compute boundaries as the target. 121 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 122 123 Returns: 124 The DataLoader. 125 """ 126 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 127 ds = get_snemi_dataset( 128 path=path, 129 patch_shape=patch_shape, 130 sample=sample, 131 download=download, 132 offsets=offsets, 133 boundaries=boundaries, 134 **ds_kwargs, 135 ) 136 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the DataLoader for EM neuron segmentation in the SNEMI dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample: The sample to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.