torch_em.data.datasets.electron_microscopy.sponge_em

This dataset contains volume EM data of a sponge chamber with segmentation annotations for cells, cilia and microvilli.

It contains three annotated volumes. The dataset is part of the publication https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the dataset in your research.

View Source

  1"""This dataset contains volume EM data of a sponge chamber with
  2segmentation annotations for cells, cilia and microvilli.
  3
  4It contains three annotated volumes. The dataset is part of the publication
  5https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the
  6dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from typing import Optional, Sequence, Tuple, Union, List
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20URL = "https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1"
 21CHECKSUM = "f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5"
 22
 23
 24def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
 25    """Download the SpongeEM training data.
 26
 27    Args:
 28        path: Filepath to a folder where the downloaded data will be saved.
 29        download: Whether to download the data if it is not present.
 30
 31    Returns:
 32        The path to the downloaded data.
 33        The number of downloaded volumes.
 34    """
 35    n_files = len(glob(os.path.join(path, "*.h5")))
 36    if n_files == 3:
 37        return path, n_files
 38    elif n_files == 0:
 39        pass
 40    else:
 41        raise RuntimeError(
 42            f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function."
 43        )
 44
 45    os.makedirs(path, exist_ok=True)
 46    zip_path = os.path.join(path, "data.zip")
 47    util.download_source(zip_path, URL, download, CHECKSUM)
 48    util.unzip(zip_path, path)
 49
 50    n_files = len(glob(os.path.join(path, "*.h5")))
 51    assert n_files == 3
 52    return path, n_files
 53
 54
 55def get_sponge_em_paths(
 56    path: Union[os.PathLike, str], sample_ids: Optional[Sequence[int]], download: bool = False
 57) -> List[str]:
 58    """Get paths to the SpongeEM data.
 59
 60    Args:
 61        path: Filepath to a folder where the downloaded data will saved.
 62        sample_ids: The sample to download, valid ids are 1, 2 and 3.
 63        download: Whether to download the data if it is not present.
 64
 65    Returns:
 66        The filepaths to the stored data.
 67    """
 68    data_folder, n_files = get_sponge_em_data(path, download)
 69
 70    if sample_ids is None:
 71        sample_ids = range(1, n_files + 1)
 72
 73    paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids]
 74    return paths
 75
 76
 77def get_sponge_em_dataset(
 78    path: Union[os.PathLike, str],
 79    mode: str,
 80    patch_shape: Tuple[int, int, int],
 81    sample_ids: Optional[Sequence[int]] = None,
 82    download: bool = False,
 83    **kwargs
 84) -> Dataset:
 85    """Get the SpongeEM dataset for the segmentation of structures in EM.
 86
 87    Args:
 88        path: Filepath to a folder where the downloaded data will be saved.
 89        mode: Choose the segmentation task, either 'semantic' or 'instances'.
 90        patch_shape: The patch shape to use for training.
 91        sample_ids: The sample to download, valid ids are 1, 2 and 3.
 92        download: Whether to download the data if it is not present.
 93        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 94
 95    Returns:
 96       The segmentation dataset.
 97    """
 98    assert mode in ("semantic", "instances")
 99
100    paths = get_sponge_em_paths(path, sample_ids, download)
101
102    return torch_em.default_segmentation_dataset(
103        raw_paths=paths,
104        raw_key="volumes/raw",
105        label_paths=paths,
106        label_key=f"volumes/labels/{mode}",
107        patch_shape=patch_shape,
108        **kwargs
109    )
110
111
112def get_sponge_em_loader(
113    path: Union[os.PathLike, str],
114    mode: str,
115    patch_shape: Tuple[int, int, int],
116    batch_size: int,
117    sample_ids: Optional[Sequence[int]] = None,
118    download: bool = False,
119    **kwargs
120) -> DataLoader:
121    """Get the SpongeEM dataloader for the segmentation of structures in EM.
122
123    Args:
124        path: Filepath to a folder where the downloaded data will be saved.
125        mode: Choose the segmentation task, either 'semantic' or 'instances'.
126        patch_shape: The patch shape to use for training.
127        batch_size: The batch size for training.
128        sample_ids: The sample to download, valid ids are 1, 2 and 3.
129        download: Whether to download the data if it is not present.
130        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
131
132    Returns:
133       The DataLoader.
134    """
135    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
136    ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs)
137    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

URL = 'https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1'

CHECKSUM = 'f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5'

def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]: View Source

25def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
26    """Download the SpongeEM training data.
27
28    Args:
29        path: Filepath to a folder where the downloaded data will be saved.
30        download: Whether to download the data if it is not present.
31
32    Returns:
33        The path to the downloaded data.
34        The number of downloaded volumes.
35    """
36    n_files = len(glob(os.path.join(path, "*.h5")))
37    if n_files == 3:
38        return path, n_files
39    elif n_files == 0:
40        pass
41    else:
42        raise RuntimeError(
43            f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function."
44        )
45
46    os.makedirs(path, exist_ok=True)
47    zip_path = os.path.join(path, "data.zip")
48    util.download_source(zip_path, URL, download, CHECKSUM)
49    util.unzip(zip_path, path)
50
51    n_files = len(glob(os.path.join(path, "*.h5")))
52    assert n_files == 3
53    return path, n_files

Download the SpongeEM training data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

The path to the downloaded data. The number of downloaded volumes.

def get_sponge_em_paths( path: Union[os.PathLike, str], sample_ids: Optional[Sequence[int]], download: bool = False) -> List[str]: View Source

56def get_sponge_em_paths(
57    path: Union[os.PathLike, str], sample_ids: Optional[Sequence[int]], download: bool = False
58) -> List[str]:
59    """Get paths to the SpongeEM data.
60
61    Args:
62        path: Filepath to a folder where the downloaded data will saved.
63        sample_ids: The sample to download, valid ids are 1, 2 and 3.
64        download: Whether to download the data if it is not present.
65
66    Returns:
67        The filepaths to the stored data.
68    """
69    data_folder, n_files = get_sponge_em_data(path, download)
70
71    if sample_ids is None:
72        sample_ids = range(1, n_files + 1)
73
74    paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids]
75    return paths

Get paths to the SpongeEM data.

Arguments:

path: Filepath to a folder where the downloaded data will saved.
sample_ids: The sample to download, valid ids are 1, 2 and 3.
download: Whether to download the data if it is not present.

Returns:

The filepaths to the stored data.

def get_sponge_em_dataset( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 78def get_sponge_em_dataset(
 79    path: Union[os.PathLike, str],
 80    mode: str,
 81    patch_shape: Tuple[int, int, int],
 82    sample_ids: Optional[Sequence[int]] = None,
 83    download: bool = False,
 84    **kwargs
 85) -> Dataset:
 86    """Get the SpongeEM dataset for the segmentation of structures in EM.
 87
 88    Args:
 89        path: Filepath to a folder where the downloaded data will be saved.
 90        mode: Choose the segmentation task, either 'semantic' or 'instances'.
 91        patch_shape: The patch shape to use for training.
 92        sample_ids: The sample to download, valid ids are 1, 2 and 3.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97       The segmentation dataset.
 98    """
 99    assert mode in ("semantic", "instances")
100
101    paths = get_sponge_em_paths(path, sample_ids, download)
102
103    return torch_em.default_segmentation_dataset(
104        raw_paths=paths,
105        raw_key="volumes/raw",
106        label_paths=paths,
107        label_key=f"volumes/labels/{mode}",
108        patch_shape=patch_shape,
109        **kwargs
110    )

Get the SpongeEM dataset for the segmentation of structures in EM.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
mode: Choose the segmentation task, either 'semantic' or 'instances'.
patch_shape: The patch shape to use for training.
sample_ids: The sample to download, valid ids are 1, 2 and 3.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_sponge_em_loader( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

113def get_sponge_em_loader(
114    path: Union[os.PathLike, str],
115    mode: str,
116    patch_shape: Tuple[int, int, int],
117    batch_size: int,
118    sample_ids: Optional[Sequence[int]] = None,
119    download: bool = False,
120    **kwargs
121) -> DataLoader:
122    """Get the SpongeEM dataloader for the segmentation of structures in EM.
123
124    Args:
125        path: Filepath to a folder where the downloaded data will be saved.
126        mode: Choose the segmentation task, either 'semantic' or 'instances'.
127        patch_shape: The patch shape to use for training.
128        batch_size: The batch size for training.
129        sample_ids: The sample to download, valid ids are 1, 2 and 3.
130        download: Whether to download the data if it is not present.
131        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
132
133    Returns:
134       The DataLoader.
135    """
136    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
137    ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs)
138    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the SpongeEM dataloader for the segmentation of structures in EM.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
mode: Choose the segmentation task, either 'semantic' or 'instances'.
patch_shape: The patch shape to use for training.
batch_size: The batch size for training.
sample_ids: The sample to download, valid ids are 1, 2 and 3.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.