torch_em.data.datasets.electron_microscopy.sponge_em
This dataset contains volume EM data of a sponge chamber with segmentation annotations for cells, cilia and microvilli.
It contains three annotated volumes. The dataset is part of the publication https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the dataset in your research.
1"""This dataset contains volume EM data of a sponge chamber with 2segmentation annotations for cells, cilia and microvilli. 3 4It contains three annotated volumes. The dataset is part of the publication 5https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the 6dataset in your research. 7""" 8 9import os 10from glob import glob 11from typing import Optional, Sequence, Tuple, Union 12 13import torch_em 14from torch.utils.data import Dataset, DataLoader 15from .. import util 16 17URL = "https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1" 18CHECKSUM = "f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5" 19 20 21def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]: 22 """Download the SpongeEM training data. 23 24 Args: 25 path: Filepath to a folder where the downloaded data will be saved. 26 download: Whether to download the data if it is not present. 27 28 Returns: 29 The path to the downloaded data. 30 The number of downloaded volumes. 31 """ 32 n_files = len(glob(os.path.join(path, "*.h5"))) 33 if n_files == 3: 34 return path, n_files 35 elif n_files == 0: 36 pass 37 else: 38 raise RuntimeError( 39 f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function." 40 ) 41 42 os.makedirs(path, exist_ok=True) 43 zip_path = os.path.join(path, "data.zip") 44 util.download_source(zip_path, URL, download, CHECKSUM) 45 util.unzip(zip_path, path) 46 47 n_files = len(glob(os.path.join(path, "*.h5"))) 48 assert n_files == 3 49 return path, n_files 50 51 52def get_sponge_em_dataset( 53 path: Union[os.PathLike, str], 54 mode: str, 55 patch_shape: Tuple[int, int, int], 56 sample_ids: Optional[Sequence[int]] = None, 57 download: bool = False, 58 **kwargs 59) -> Dataset: 60 """Get the SpongeEM dataset for the segmentation of structures in EM. 61 62 Args: 63 path: Filepath to a folder where the downloaded data will be saved. 64 mode: Choose the segmentation task, either 'semantic' or 'instances'. 65 patch_shape: The patch shape to use for training. 66 sample_ids: The sample to download, valid ids are 1, 2 and 3. 67 download: Whether to download the data if it is not present. 68 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 69 70 Returns: 71 The segmentation dataset. 72 """ 73 74 assert mode in ("semantic", "instances") 75 data_folder, n_files = get_sponge_em_data(path, download) 76 77 if sample_ids is None: 78 sample_ids = range(1, n_files + 1) 79 paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids] 80 81 raw_key = "volumes/raw" 82 label_key = f"volumes/labels/{mode}" 83 return torch_em.default_segmentation_dataset(paths, raw_key, paths, label_key, patch_shape, **kwargs) 84 85 86def get_sponge_em_loader( 87 path: Union[os.PathLike, str], 88 mode: str, 89 patch_shape: Tuple[int, int, int], 90 batch_size: int, 91 sample_ids: Optional[Sequence[int]] = None, 92 download: bool = False, 93 **kwargs 94) -> DataLoader: 95 """Get the SpongeEM dataloader for the segmentation of structures in EM. 96 97 Args: 98 path: Filepath to a folder where the downloaded data will be saved. 99 mode: Choose the segmentation task, either 'semantic' or 'instances'. 100 patch_shape: The patch shape to use for training. 101 batch_size: The batch size for training. 102 sample_ids: The sample to download, valid ids are 1, 2 and 3. 103 download: Whether to download the data if it is not present. 104 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 106 107 Returns: 108 The DataLoader. 109 """ 110 ds_kwargs, loader_kwargs = util.split_kwargs( 111 torch_em.default_segmentation_dataset, **kwargs 112 ) 113 ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs) 114 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL =
'https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1'
CHECKSUM =
'f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5'
def
get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
22def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]: 23 """Download the SpongeEM training data. 24 25 Args: 26 path: Filepath to a folder where the downloaded data will be saved. 27 download: Whether to download the data if it is not present. 28 29 Returns: 30 The path to the downloaded data. 31 The number of downloaded volumes. 32 """ 33 n_files = len(glob(os.path.join(path, "*.h5"))) 34 if n_files == 3: 35 return path, n_files 36 elif n_files == 0: 37 pass 38 else: 39 raise RuntimeError( 40 f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function." 41 ) 42 43 os.makedirs(path, exist_ok=True) 44 zip_path = os.path.join(path, "data.zip") 45 util.download_source(zip_path, URL, download, CHECKSUM) 46 util.unzip(zip_path, path) 47 48 n_files = len(glob(os.path.join(path, "*.h5"))) 49 assert n_files == 3 50 return path, n_files
Download the SpongeEM training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data. The number of downloaded volumes.
def
get_sponge_em_dataset( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
53def get_sponge_em_dataset( 54 path: Union[os.PathLike, str], 55 mode: str, 56 patch_shape: Tuple[int, int, int], 57 sample_ids: Optional[Sequence[int]] = None, 58 download: bool = False, 59 **kwargs 60) -> Dataset: 61 """Get the SpongeEM dataset for the segmentation of structures in EM. 62 63 Args: 64 path: Filepath to a folder where the downloaded data will be saved. 65 mode: Choose the segmentation task, either 'semantic' or 'instances'. 66 patch_shape: The patch shape to use for training. 67 sample_ids: The sample to download, valid ids are 1, 2 and 3. 68 download: Whether to download the data if it is not present. 69 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 70 71 Returns: 72 The segmentation dataset. 73 """ 74 75 assert mode in ("semantic", "instances") 76 data_folder, n_files = get_sponge_em_data(path, download) 77 78 if sample_ids is None: 79 sample_ids = range(1, n_files + 1) 80 paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids] 81 82 raw_key = "volumes/raw" 83 label_key = f"volumes/labels/{mode}" 84 return torch_em.default_segmentation_dataset(paths, raw_key, paths, label_key, patch_shape, **kwargs)
Get the SpongeEM dataset for the segmentation of structures in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- mode: Choose the segmentation task, either 'semantic' or 'instances'.
- patch_shape: The patch shape to use for training.
- sample_ids: The sample to download, valid ids are 1, 2 and 3.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_sponge_em_loader( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
87def get_sponge_em_loader( 88 path: Union[os.PathLike, str], 89 mode: str, 90 patch_shape: Tuple[int, int, int], 91 batch_size: int, 92 sample_ids: Optional[Sequence[int]] = None, 93 download: bool = False, 94 **kwargs 95) -> DataLoader: 96 """Get the SpongeEM dataloader for the segmentation of structures in EM. 97 98 Args: 99 path: Filepath to a folder where the downloaded data will be saved. 100 mode: Choose the segmentation task, either 'semantic' or 'instances'. 101 patch_shape: The patch shape to use for training. 102 batch_size: The batch size for training. 103 sample_ids: The sample to download, valid ids are 1, 2 and 3. 104 download: Whether to download the data if it is not present. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 106 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 107 108 Returns: 109 The DataLoader. 110 """ 111 ds_kwargs, loader_kwargs = util.split_kwargs( 112 torch_em.default_segmentation_dataset, **kwargs 113 ) 114 ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs) 115 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the SpongeEM dataloader for the segmentation of structures in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- mode: Choose the segmentation task, either 'semantic' or 'instances'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample_ids: The sample to download, valid ids are 1, 2 and 3.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
. - kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.