torch_em.data.datasets.electron_microscopy.sponge_em

This dataset contains volume EM data of a sponge chamber with segmentation annotations for cells, cilia and microvilli.

It contains three annotated volumes. The dataset is part of the publication https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the dataset in your research.

  1"""This dataset contains volume EM data of a sponge chamber with
  2segmentation annotations for cells, cilia and microvilli.
  3
  4It contains three annotated volumes. The dataset is part of the publication
  5https://doi.org/10.1126/science.abj2949. Please cite this publication of you use the
  6dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from typing import Optional, Sequence, Tuple, Union
 12
 13import torch_em
 14from torch.utils.data import Dataset, DataLoader
 15from .. import util
 16
 17URL = "https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1"
 18CHECKSUM = "f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5"
 19
 20
 21def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
 22    """Download the SpongeEM training data.
 23
 24    Args:
 25        path: Filepath to a folder where the downloaded data will be saved.
 26        download: Whether to download the data if it is not present.
 27
 28    Returns:
 29        The path to the downloaded data.
 30        The number of downloaded volumes.
 31    """
 32    n_files = len(glob(os.path.join(path, "*.h5")))
 33    if n_files == 3:
 34        return path, n_files
 35    elif n_files == 0:
 36        pass
 37    else:
 38        raise RuntimeError(
 39            f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function."
 40        )
 41
 42    os.makedirs(path, exist_ok=True)
 43    zip_path = os.path.join(path, "data.zip")
 44    util.download_source(zip_path, URL, download, CHECKSUM)
 45    util.unzip(zip_path, path)
 46
 47    n_files = len(glob(os.path.join(path, "*.h5")))
 48    assert n_files == 3
 49    return path, n_files
 50
 51
 52def get_sponge_em_dataset(
 53    path: Union[os.PathLike, str],
 54    mode: str,
 55    patch_shape: Tuple[int, int, int],
 56    sample_ids: Optional[Sequence[int]] = None,
 57    download: bool = False,
 58    **kwargs
 59) -> Dataset:
 60    """Get the SpongeEM dataset for the segmentation of structures in EM.
 61
 62    Args:
 63        path: Filepath to a folder where the downloaded data will be saved.
 64        mode: Choose the segmentation task, either 'semantic' or 'instances'.
 65        patch_shape: The patch shape to use for training.
 66        sample_ids: The sample to download, valid ids are 1, 2 and 3.
 67        download: Whether to download the data if it is not present.
 68        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 69
 70    Returns:
 71       The segmentation dataset.
 72    """
 73
 74    assert mode in ("semantic", "instances")
 75    data_folder, n_files = get_sponge_em_data(path, download)
 76
 77    if sample_ids is None:
 78        sample_ids = range(1, n_files + 1)
 79    paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids]
 80
 81    raw_key = "volumes/raw"
 82    label_key = f"volumes/labels/{mode}"
 83    return torch_em.default_segmentation_dataset(paths, raw_key, paths, label_key, patch_shape, **kwargs)
 84
 85
 86def get_sponge_em_loader(
 87    path: Union[os.PathLike, str],
 88    mode: str,
 89    patch_shape: Tuple[int, int, int],
 90    batch_size: int,
 91    sample_ids: Optional[Sequence[int]] = None,
 92    download: bool = False,
 93    **kwargs
 94) -> DataLoader:
 95    """Get the SpongeEM dataloader for the segmentation of structures in EM.
 96
 97    Args:
 98        path: Filepath to a folder where the downloaded data will be saved.
 99        mode: Choose the segmentation task, either 'semantic' or 'instances'.
100        patch_shape: The patch shape to use for training.
101        batch_size: The batch size for training.
102        sample_ids: The sample to download, valid ids are 1, 2 and 3.
103        download: Whether to download the data if it is not present.
104        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
105        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
106
107    Returns:
108       The DataLoader.
109    """
110    ds_kwargs, loader_kwargs = util.split_kwargs(
111        torch_em.default_segmentation_dataset, **kwargs
112    )
113    ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs)
114    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL = 'https://zenodo.org/record/8150818/files/sponge_em_train_data.zip?download=1'
CHECKSUM = 'f1df616cd60f81b91d7642933e9edd74dc6c486b2e546186a7c1e54c67dd32a5'
def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
22def get_sponge_em_data(path: Union[os.PathLike, str], download: bool) -> Tuple[str, int]:
23    """Download the SpongeEM training data.
24
25    Args:
26        path: Filepath to a folder where the downloaded data will be saved.
27        download: Whether to download the data if it is not present.
28
29    Returns:
30        The path to the downloaded data.
31        The number of downloaded volumes.
32    """
33    n_files = len(glob(os.path.join(path, "*.h5")))
34    if n_files == 3:
35        return path, n_files
36    elif n_files == 0:
37        pass
38    else:
39        raise RuntimeError(
40            f"Invalid number of downloaded files in {path}. Please remove this folder and rerun this function."
41        )
42
43    os.makedirs(path, exist_ok=True)
44    zip_path = os.path.join(path, "data.zip")
45    util.download_source(zip_path, URL, download, CHECKSUM)
46    util.unzip(zip_path, path)
47
48    n_files = len(glob(os.path.join(path, "*.h5")))
49    assert n_files == 3
50    return path, n_files

Download the SpongeEM training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The path to the downloaded data. The number of downloaded volumes.

def get_sponge_em_dataset( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
53def get_sponge_em_dataset(
54    path: Union[os.PathLike, str],
55    mode: str,
56    patch_shape: Tuple[int, int, int],
57    sample_ids: Optional[Sequence[int]] = None,
58    download: bool = False,
59    **kwargs
60) -> Dataset:
61    """Get the SpongeEM dataset for the segmentation of structures in EM.
62
63    Args:
64        path: Filepath to a folder where the downloaded data will be saved.
65        mode: Choose the segmentation task, either 'semantic' or 'instances'.
66        patch_shape: The patch shape to use for training.
67        sample_ids: The sample to download, valid ids are 1, 2 and 3.
68        download: Whether to download the data if it is not present.
69        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
70
71    Returns:
72       The segmentation dataset.
73    """
74
75    assert mode in ("semantic", "instances")
76    data_folder, n_files = get_sponge_em_data(path, download)
77
78    if sample_ids is None:
79        sample_ids = range(1, n_files + 1)
80    paths = [os.path.join(data_folder, f"train_data_0{i}.h5") for i in sample_ids]
81
82    raw_key = "volumes/raw"
83    label_key = f"volumes/labels/{mode}"
84    return torch_em.default_segmentation_dataset(paths, raw_key, paths, label_key, patch_shape, **kwargs)

Get the SpongeEM dataset for the segmentation of structures in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • mode: Choose the segmentation task, either 'semantic' or 'instances'.
  • patch_shape: The patch shape to use for training.
  • sample_ids: The sample to download, valid ids are 1, 2 and 3.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_sponge_em_loader( path: Union[os.PathLike, str], mode: str, patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
 87def get_sponge_em_loader(
 88    path: Union[os.PathLike, str],
 89    mode: str,
 90    patch_shape: Tuple[int, int, int],
 91    batch_size: int,
 92    sample_ids: Optional[Sequence[int]] = None,
 93    download: bool = False,
 94    **kwargs
 95) -> DataLoader:
 96    """Get the SpongeEM dataloader for the segmentation of structures in EM.
 97
 98    Args:
 99        path: Filepath to a folder where the downloaded data will be saved.
100        mode: Choose the segmentation task, either 'semantic' or 'instances'.
101        patch_shape: The patch shape to use for training.
102        batch_size: The batch size for training.
103        sample_ids: The sample to download, valid ids are 1, 2 and 3.
104        download: Whether to download the data if it is not present.
105        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
106        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
107
108    Returns:
109       The DataLoader.
110    """
111    ds_kwargs, loader_kwargs = util.split_kwargs(
112        torch_em.default_segmentation_dataset, **kwargs
113    )
114    ds = get_sponge_em_dataset(path, mode, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs)
115    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the SpongeEM dataloader for the segmentation of structures in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • mode: Choose the segmentation task, either 'semantic' or 'instances'.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_ids: The sample to download, valid ids are 1, 2 and 3.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.