torch_em.data.datasets.electron_microscopy.sponge_em
This dataset contains volume EM data of a sponge chamber with segmentation annotations for cells, cilia and microvilli.
It contains three annotated volumes. The dataset is part of the publication https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the dataset in your research.
1"""This dataset contains volume EM data of a sponge chamber with 2segmentation annotations for cells, cilia and microvilli. 3 4It contains three annotated volumes. The dataset is part of the publication 5https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the 6dataset in your research. 7""" 8 9import os 10from glob import glob 11from typing import Optional, Sequence, Tuple, Union, List 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20URL = "https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1" 21CHECKSUM = "f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5" 22 23 24def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]: 25 """Download the SpongeEM training data. 26 27 Args: 28 path: Filepath to a folder where the downloaded data will be saved. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 The path to the downloaded data. 33 The number of downloaded volumes. 34 """ 35 n_files = len(glob(os.path.join(path, "*.h5"))) 36 if n_files == 3: 37 return path, n_files 38 elif n_files == 0: 39 pass 40 else: 41 raise RuntimeError( 42 f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function." 43 ) 44 45 os.makedirs(path, exist_ok=True) 46 zip_path = os.path.join(path, "data.zip") 47 util.download_source(zip_path, URL, download, CHECKSUM) 48 util.unzip(zip_path, path) 49 50 n_files = len(glob(os.path.join(path, "*.h5"))) 51 assert n_files == 3 52 return path, n_files 53 54 55def get_sponge_em_paths( 56 path: Union[os.PathLike, str], sample_ids: Optional[Sequence[int]], download: bool = False 57) -> List[str]: 58 """Get paths to the SpongeEM data. 59 60 Args: 61 path: Filepath to a folder where the downloaded data will saved. 62 sample_ids: The sample to download, valid ids are 1, 2 and 3. 63 download: Whether to download the data if it is not present. 64 65 Returns: 66 The filepaths to the stored data. 67 """ 68 data_folder, n_files = get_sponge_em_data(path, download) 69 70 if sample_ids is None: 71 sample_ids = range(1, n_files + 1) 72 73 paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids] 74 return paths 75 76 77def get_sponge_em_dataset( 78 path: Union[os.PathLike, str], 79 mode: str, 80 patch_shape: Tuple[int, int, int], 81 sample_ids: Optional[Sequence[int]] = None, 82 download: bool = False, 83 **kwargs 84) -> Dataset: 85 """Get the SpongeEM dataset for the segmentation of structures in EM. 86 87 Args: 88 path: Filepath to a folder where the downloaded data will be saved. 89 mode: Choose the segmentation task, either 'semantic' or 'instances'. 90 patch_shape: The patch shape to use for training. 91 sample_ids: The sample to download, valid ids are 1, 2 and 3. 92 download: Whether to download the data if it is not present. 93 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 94 95 Returns: 96 The segmentation dataset. 97 """ 98 assert mode in ("semantic", "instances") 99 100 paths = get_sponge_em_paths(path, sample_ids, download) 101 102 return torch_em.default_segmentation_dataset( 103 raw_paths=paths, 104 raw_key="volumes/raw", 105 label_paths=paths, 106 label_key=f"volumes/labels/{mode}", 107 patch_shape=patch_shape, 108 **kwargs 109 ) 110 111 112def get_sponge_em_loader( 113 path: Union[os.PathLike, str], 114 mode: str, 115 patch_shape: Tuple[int, int, int], 116 batch_size: int, 117 sample_ids: Optional[Sequence[int]] = None, 118 download: bool = False, 119 **kwargs 120) -> DataLoader: 121 """Get the SpongeEM dataloader for the segmentation of structures in EM. 122 123 Args: 124 path: Filepath to a folder where the downloaded data will be saved. 125 mode: Choose the segmentation task, either 'semantic' or 'instances'. 126 patch_shape: The patch shape to use for training. 127 batch_size: The batch size for training. 128 sample_ids: The sample to download, valid ids are 1, 2 and 3. 129 download: Whether to download the data if it is not present. 130 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 131 132 Returns: 133 The DataLoader. 134 """ 135 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 136 ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs) 137 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL =
'https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1'
CHECKSUM =
'f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5'
def
get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
25def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]: 26 """Download the SpongeEM training data. 27 28 Args: 29 path: Filepath to a folder where the downloaded data will be saved. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 The path to the downloaded data. 34 The number of downloaded volumes. 35 """ 36 n_files = len(glob(os.path.join(path, "*.h5"))) 37 if n_files == 3: 38 return path, n_files 39 elif n_files == 0: 40 pass 41 else: 42 raise RuntimeError( 43 f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function." 44 ) 45 46 os.makedirs(path, exist_ok=True) 47 zip_path = os.path.join(path, "data.zip") 48 util.download_source(zip_path, URL, download, CHECKSUM) 49 util.unzip(zip_path, path) 50 51 n_files = len(glob(os.path.join(path, "*.h5"))) 52 assert n_files == 3 53 return path, n_files
Download the SpongeEM training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data. The number of downloaded volumes.
def
get_sponge_em_paths( path: Union[os.PathLike, str], sample_ids: Optional[Sequence[int]], download: bool = False) -> List[str]:
56def get_sponge_em_paths( 57 path: Union[os.PathLike, str], sample_ids: Optional[Sequence[int]], download: bool = False 58) -> List[str]: 59 """Get paths to the SpongeEM data. 60 61 Args: 62 path: Filepath to a folder where the downloaded data will saved. 63 sample_ids: The sample to download, valid ids are 1, 2 and 3. 64 download: Whether to download the data if it is not present. 65 66 Returns: 67 The filepaths to the stored data. 68 """ 69 data_folder, n_files = get_sponge_em_data(path, download) 70 71 if sample_ids is None: 72 sample_ids = range(1, n_files + 1) 73 74 paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids] 75 return paths
Get paths to the SpongeEM data.
Arguments:
- path: Filepath to a folder where the downloaded data will saved.
- sample_ids: The sample to download, valid ids are 1, 2 and 3.
- download: Whether to download the data if it is not present.
Returns:
The filepaths to the stored data.
def
get_sponge_em_dataset( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
78def get_sponge_em_dataset( 79 path: Union[os.PathLike, str], 80 mode: str, 81 patch_shape: Tuple[int, int, int], 82 sample_ids: Optional[Sequence[int]] = None, 83 download: bool = False, 84 **kwargs 85) -> Dataset: 86 """Get the SpongeEM dataset for the segmentation of structures in EM. 87 88 Args: 89 path: Filepath to a folder where the downloaded data will be saved. 90 mode: Choose the segmentation task, either 'semantic' or 'instances'. 91 patch_shape: The patch shape to use for training. 92 sample_ids: The sample to download, valid ids are 1, 2 and 3. 93 download: Whether to download the data if it is not present. 94 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 95 96 Returns: 97 The segmentation dataset. 98 """ 99 assert mode in ("semantic", "instances") 100 101 paths = get_sponge_em_paths(path, sample_ids, download) 102 103 return torch_em.default_segmentation_dataset( 104 raw_paths=paths, 105 raw_key="volumes/raw", 106 label_paths=paths, 107 label_key=f"volumes/labels/{mode}", 108 patch_shape=patch_shape, 109 **kwargs 110 )
Get the SpongeEM dataset for the segmentation of structures in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- mode: Choose the segmentation task, either 'semantic' or 'instances'.
- patch_shape: The patch shape to use for training.
- sample_ids: The sample to download, valid ids are 1, 2 and 3.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_sponge_em_loader( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
113def get_sponge_em_loader( 114 path: Union[os.PathLike, str], 115 mode: str, 116 patch_shape: Tuple[int, int, int], 117 batch_size: int, 118 sample_ids: Optional[Sequence[int]] = None, 119 download: bool = False, 120 **kwargs 121) -> DataLoader: 122 """Get the SpongeEM dataloader for the segmentation of structures in EM. 123 124 Args: 125 path: Filepath to a folder where the downloaded data will be saved. 126 mode: Choose the segmentation task, either 'semantic' or 'instances'. 127 patch_shape: The patch shape to use for training. 128 batch_size: The batch size for training. 129 sample_ids: The sample to download, valid ids are 1, 2 and 3. 130 download: Whether to download the data if it is not present. 131 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 132 133 Returns: 134 The DataLoader. 135 """ 136 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 137 ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs) 138 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the SpongeEM dataloader for the segmentation of structures in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- mode: Choose the segmentation task, either 'semantic' or 'instances'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample_ids: The sample to download, valid ids are 1, 2 and 3.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.