torch_em.data.datasets.light_microscopy.celegans_atlas

CElegans Atlas is a dataset that contains nucleus annotations in 3d confocal microscopy images.

The preprocessed dataset is located at https://zenodo.org/records/5942575. The raw images are from the publication https://doi.org/10.1038/nmeth.1366. The nucleus annotation masks were generated in the publication https://arxiv.org/abs/2002.02857. And the available data splits were made by the following publication https://arxiv.org/abs/1908.03636.

Please cite them all if you use this dataset for your research.

  1"""CElegans Atlas is a dataset that contains nucleus annotations in 3d confocal microscopy images.
  2
  3The preprocessed dataset is located at https://zenodo.org/records/5942575.
  4The raw images are from the publication https://doi.org/10.1038/nmeth.1366.
  5The nucleus annotation masks were generated in the publication https://arxiv.org/abs/2002.02857.
  6And the available data splits were made by the following publication https://arxiv.org/abs/1908.03636.
  7
  8Please cite them all if you use this dataset for your research.
  9"""
 10
 11import os
 12import shutil
 13from glob import glob
 14from natsort import natsorted
 15from typing import Union, Tuple, List, Literal
 16
 17from torch.utils.data import Dataset, DataLoader
 18
 19import torch_em
 20
 21from .. import util
 22
 23
 24URL = "https://zenodo.org/records/5942575/files/c_elegans_nuclei.zip"
 25CHECKSUM = "1def07491cdad89e381cbe4437ef03da3af8f78d127e8152cd9b32bdab152c4e"
 26
 27
 28def get_celegans_atlas_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the CElegans Atlas dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        Filepath where the dataset is stored.
 37    """
 38    data_dir = os.path.join(path, "c_elegans_nuclei")
 39    if os.path.exists(data_dir):
 40        return data_dir
 41
 42    os.makedirs(path, exist_ok=True)
 43
 44    # Download and unzip the images.
 45    zip_path = os.path.join(path, "c_elegans_nuclei.zip")
 46    util.download_source(zip_path, url=URL, checksum=CHECKSUM, download=download)
 47    util.unzip(zip_path, path)
 48
 49    # Remove other miscellanous folders.
 50    shutil.rmtree(os.path.join(path, "__MACOSX"))
 51
 52    return data_dir
 53
 54
 55def get_celegans_atlas_paths(
 56    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False,
 57) -> Tuple[List[str], List[str]]:
 58    """Get paths to the CElegans Atlas data.
 59
 60    Args:
 61        path: Filepath to a folder where the downloaded data will be saved.
 62        split: The data split to use. Either 'train', 'val' or 'test'.
 63        download: Whether to download the data if it is not present.
 64
 65    Returns:
 66        List of filepaths for the image data.
 67        List of filepaths for the label data.
 68    """
 69    if split not in ["train", "val", "test"]:
 70        raise ValueError(f"'{split}' is not a valid data split choice.")
 71
 72    data_path = get_celegans_atlas_data(path, download)
 73
 74    raw_paths = natsorted(glob(os.path.join(data_path, split, "images", "*.tif")))
 75    label_paths = natsorted(glob(os.path.join(data_path, split, "masks", "*.tif")))
 76
 77    return raw_paths, label_paths
 78
 79
 80def get_celegans_atlas_dataset(
 81    path: Union[os.PathLike, str],
 82    patch_shape: Tuple[int, ...],
 83    split: Literal["train", "val", "test"],
 84    download: bool = False,
 85    **kwargs,
 86) -> Dataset:
 87    """Get the CElegans Atlas dataset for nucleus segmentation.
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        patch_shape: The patch shape to use for training.
 92        split: The data split to use. Either 'train', 'val' or 'test'.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97        The segmentation dataset.
 98    """
 99    raw_paths, label_paths = get_celegans_atlas_paths(path, split, download)
100
101    return torch_em.default_segmentation_dataset(
102        raw_paths=raw_paths,
103        raw_key=None,
104        label_paths=label_paths,
105        label_key=None,
106        patch_shape=patch_shape,
107        **kwargs,
108    )
109
110
111def get_celegans_atlas_loader(
112    path: Union[os.PathLike, str],
113    batch_size: int,
114    patch_shape: Tuple[int, ...],
115    split: Literal["train", "val", "test"],
116    download: bool = False,
117    **kwargs,
118) -> DataLoader:
119    """Get the CElegans Atlas dataloader for nucleus segmentation.
120
121    Args:
122        path: Filepath to a folder where the downloaded data will be saved.
123        batch_size: The batch size for training.
124        patch_shape: The patch shape to use for training.
125        split: The data split to use. Either 'train', 'val' or 'test'.
126        download: Whether to download the data if it is not present.
127        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
128
129    Returns:
130        The DataLoader.
131    """
132    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
133    dataset = get_celegans_atlas_dataset(path, patch_shape, split, download, **ds_kwargs)
134    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/5942575/files/c_elegans_nuclei.zip'
CHECKSUM = '1def07491cdad89e381cbe4437ef03da3af8f78d127e8152cd9b32bdab152c4e'
def get_celegans_atlas_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_celegans_atlas_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the CElegans Atlas dataset.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        Filepath where the dataset is stored.
38    """
39    data_dir = os.path.join(path, "c_elegans_nuclei")
40    if os.path.exists(data_dir):
41        return data_dir
42
43    os.makedirs(path, exist_ok=True)
44
45    # Download and unzip the images.
46    zip_path = os.path.join(path, "c_elegans_nuclei.zip")
47    util.download_source(zip_path, url=URL, checksum=CHECKSUM, download=download)
48    util.unzip(zip_path, path)
49
50    # Remove other miscellanous folders.
51    shutil.rmtree(os.path.join(path, "__MACOSX"))
52
53    return data_dir

Download the CElegans Atlas dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the dataset is stored.

def get_celegans_atlas_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
56def get_celegans_atlas_paths(
57    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False,
58) -> Tuple[List[str], List[str]]:
59    """Get paths to the CElegans Atlas data.
60
61    Args:
62        path: Filepath to a folder where the downloaded data will be saved.
63        split: The data split to use. Either 'train', 'val' or 'test'.
64        download: Whether to download the data if it is not present.
65
66    Returns:
67        List of filepaths for the image data.
68        List of filepaths for the label data.
69    """
70    if split not in ["train", "val", "test"]:
71        raise ValueError(f"'{split}' is not a valid data split choice.")
72
73    data_path = get_celegans_atlas_data(path, download)
74
75    raw_paths = natsorted(glob(os.path.join(data_path, split, "images", "*.tif")))
76    label_paths = natsorted(glob(os.path.join(data_path, split, "masks", "*.tif")))
77
78    return raw_paths, label_paths

Get paths to the CElegans Atlas data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_celegans_atlas_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 81def get_celegans_atlas_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, ...],
 84    split: Literal["train", "val", "test"],
 85    download: bool = False,
 86    **kwargs,
 87) -> Dataset:
 88    """Get the CElegans Atlas dataset for nucleus segmentation.
 89
 90    Args:
 91        path: Filepath to a folder where the downloaded data will be saved.
 92        patch_shape: The patch shape to use for training.
 93        split: The data split to use. Either 'train', 'val' or 'test'.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    raw_paths, label_paths = get_celegans_atlas_paths(path, split, download)
101
102    return torch_em.default_segmentation_dataset(
103        raw_paths=raw_paths,
104        raw_key=None,
105        label_paths=label_paths,
106        label_key=None,
107        patch_shape=patch_shape,
108        **kwargs,
109    )

Get the CElegans Atlas dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_celegans_atlas_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
112def get_celegans_atlas_loader(
113    path: Union[os.PathLike, str],
114    batch_size: int,
115    patch_shape: Tuple[int, ...],
116    split: Literal["train", "val", "test"],
117    download: bool = False,
118    **kwargs,
119) -> DataLoader:
120    """Get the CElegans Atlas dataloader for nucleus segmentation.
121
122    Args:
123        path: Filepath to a folder where the downloaded data will be saved.
124        batch_size: The batch size for training.
125        patch_shape: The patch shape to use for training.
126        split: The data split to use. Either 'train', 'val' or 'test'.
127        download: Whether to download the data if it is not present.
128        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
129
130    Returns:
131        The DataLoader.
132    """
133    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
134    dataset = get_celegans_atlas_dataset(path, patch_shape, split, download, **ds_kwargs)
135    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the CElegans Atlas dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.