torch_em.data.datasets.light_microscopy.embedseg_data

This dataset contains annotation for 3d fluorescence microscopy segmentation that were introduced by the EmbedSeg publication.

This dataset is from the publication https://proceedings.mlr.press/v143/lalit21a.html. Please cite it if you use this dataset in your research.

  1"""This dataset contains annotation for 3d fluorescence microscopy segmentation
  2that were introduced by the EmbedSeg publication.
  3
  4This dataset is from the publication https://proceedings.mlr.press/v143/lalit21a.html.
  5Please cite it if you use this dataset in your research.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import Tuple, Union, List
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19URLS = {
 20    "Mouse-Organoid-Cells-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Organoid-Cells-CBG.zip",  # noqa
 21    "Mouse-Skull-Nuclei-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Skull-Nuclei-CBG.zip",
 22    "Platynereis-ISH-Nuclei-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-ISH-Nuclei-CBG.zip",  # noqa
 23    "Platynereis-Nuclei-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip",
 24}
 25CHECKSUMS = {
 26    "Mouse-Organoid-Cells-CBG": "3695ac340473900ace8c37fd7f3ae0d37217de9f2b86c2341f36b1727825e48b",
 27    "Mouse-Skull-Nuclei-CBG": "3600ec261a48bf953820e0536cacd0bb8a5141be6e7435a4cb0fffeb0caf594e",
 28    "Platynereis-ISH-Nuclei-CBG": "bc9284df6f6d691a8e81b47310d95617252cc98ebf7daeab55801b330ba921e0",
 29    "Platynereis-Nuclei-CBG": "448cb7b46f2fe7d472795e05c8d7dfb40f259d94595ad2cfd256bc2aa4ab3be7",
 30}
 31
 32
 33def get_embedseg_data(path: Union[os.PathLike, str], name: str, download: bool) -> str:
 34    """Download the EmbedSeg training data.
 35
 36    Args:
 37        path: Filepath to a folder where the downloaded data will be saved.
 38        name: Name of the dataset to download.
 39        download: Whether to download the data if it is not present.
 40
 41    Returns:
 42        The filepath to the training data.
 43    """
 44    if name not in URLS:
 45        raise ValueError(f"The dataset name must be in {list(URLS.keys())}. You provided {name}.")
 46
 47    url = URLS[name]
 48    checksum = CHECKSUMS[name]
 49
 50    data_path = os.path.join(path, name)
 51    if os.path.exists(data_path):
 52        return data_path
 53
 54    os.makedirs(path, exist_ok=True)
 55    zip_path = os.path.join(path, f"{name}.zip")
 56    util.download_source(zip_path, url, download, checksum)
 57    util.unzip(zip_path, path, True)
 58
 59    return data_path
 60
 61
 62def get_embedseg_paths(
 63    path: Union[os.PathLike, str], name: str, split: str, download: bool = False
 64) -> Tuple[List[str], List[str]]:
 65    """Get paths to the EmbedSeg data.
 66
 67    Args:
 68        path: Filepath to a folder where the downloaded data will be saved.
 69        name: Name of the dataset to download.
 70        split: The split to use for the dataset.
 71        download: Whether to download the data if it is not present.
 72
 73    Returns:
 74        List of filepaths for the mage data.
 75        List of filepaths for the label data.
 76    """
 77    data_root = get_embedseg_data(path, name, download)
 78
 79    raw_paths = sorted(glob(os.path.join(data_root, split, "images", "*.tif")))
 80    label_paths = sorted(glob(os.path.join(data_root, split, "masks", "*.tif")))
 81    assert len(raw_paths) > 0
 82    assert len(raw_paths) == len(label_paths)
 83
 84    return raw_paths, label_paths
 85
 86
 87def get_embedseg_dataset(
 88    path: Union[os.PathLike, str],
 89    patch_shape: Tuple[int, int],
 90    name: str,
 91    split: str = "train",
 92    download: bool = False,
 93    **kwargs
 94) -> Dataset:
 95    """Get the EmbedSeg dataset for 3d fluorescence microscopy segmentation.
 96
 97    Args:
 98        path: Filepath to a folder where the downloaded data will be saved.
 99        patch_shape: The patch shape to use for training.
100        name: Name of the dataset to download.
101        split: The split to use for the dataset.
102        download: Whether to download the data if it is not present.
103        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
104
105    Returns:
106       The segmentation dataset.
107    """
108    raw_paths, label_paths = get_embedseg_paths(path, name, split, download)
109
110    return torch_em.default_segmentation_dataset(
111        raw_paths=raw_paths,
112        raw_key=None,
113        label_paths=label_paths,
114        label_key=None,
115        patch_shape=patch_shape,
116        **kwargs
117    )
118
119
120def get_embedseg_loader(
121    path: Union[os.PathLike, str],
122    patch_shape: Tuple[int, int],
123    batch_size: int,
124    name: str,
125    split: str = "train",
126    download: bool = False,
127    **kwargs
128) -> DataLoader:
129    """Get the EmbedSeg dataloader for 3d fluorescence microscopy segmentation.
130
131    Args:
132        path: Filepath to a folder where the downloaded data will be saved.
133        patch_shape: The patch shape to use for training.
134        batch_size: The batch size for training.
135        name: Name of the dataset to download.
136        split: The split to use for the dataset.
137        download: Whether to download the data if it is not present.
138        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
139
140    Returns:
141        The DataLoader.
142    """
143    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
144    dataset = get_embedseg_dataset(
145        path, name=name, split=split, patch_shape=patch_shape, download=download, **ds_kwargs,
146    )
147    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
URLS = {'Mouse-Organoid-Cells-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Organoid-Cells-CBG.zip', 'Mouse-Skull-Nuclei-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Skull-Nuclei-CBG.zip', 'Platynereis-ISH-Nuclei-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-ISH-Nuclei-CBG.zip', 'Platynereis-Nuclei-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip'}
CHECKSUMS = {'Mouse-Organoid-Cells-CBG': '3695ac340473900ace8c37fd7f3ae0d37217de9f2b86c2341f36b1727825e48b', 'Mouse-Skull-Nuclei-CBG': '3600ec261a48bf953820e0536cacd0bb8a5141be6e7435a4cb0fffeb0caf594e', 'Platynereis-ISH-Nuclei-CBG': 'bc9284df6f6d691a8e81b47310d95617252cc98ebf7daeab55801b330ba921e0', 'Platynereis-Nuclei-CBG': '448cb7b46f2fe7d472795e05c8d7dfb40f259d94595ad2cfd256bc2aa4ab3be7'}
def get_embedseg_data(path: Union[os.PathLike, str], name: str, download: bool) -> str:
34def get_embedseg_data(path: Union[os.PathLike, str], name: str, download: bool) -> str:
35    """Download the EmbedSeg training data.
36
37    Args:
38        path: Filepath to a folder where the downloaded data will be saved.
39        name: Name of the dataset to download.
40        download: Whether to download the data if it is not present.
41
42    Returns:
43        The filepath to the training data.
44    """
45    if name not in URLS:
46        raise ValueError(f"The dataset name must be in {list(URLS.keys())}. You provided {name}.")
47
48    url = URLS[name]
49    checksum = CHECKSUMS[name]
50
51    data_path = os.path.join(path, name)
52    if os.path.exists(data_path):
53        return data_path
54
55    os.makedirs(path, exist_ok=True)
56    zip_path = os.path.join(path, f"{name}.zip")
57    util.download_source(zip_path, url, download, checksum)
58    util.unzip(zip_path, path, True)
59
60    return data_path

Download the EmbedSeg training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • name: Name of the dataset to download.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the training data.

def get_embedseg_paths( path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> Tuple[List[str], List[str]]:
63def get_embedseg_paths(
64    path: Union[os.PathLike, str], name: str, split: str, download: bool = False
65) -> Tuple[List[str], List[str]]:
66    """Get paths to the EmbedSeg data.
67
68    Args:
69        path: Filepath to a folder where the downloaded data will be saved.
70        name: Name of the dataset to download.
71        split: The split to use for the dataset.
72        download: Whether to download the data if it is not present.
73
74    Returns:
75        List of filepaths for the mage data.
76        List of filepaths for the label data.
77    """
78    data_root = get_embedseg_data(path, name, download)
79
80    raw_paths = sorted(glob(os.path.join(data_root, split, "images", "*.tif")))
81    label_paths = sorted(glob(os.path.join(data_root, split, "masks", "*.tif")))
82    assert len(raw_paths) > 0
83    assert len(raw_paths) == len(label_paths)
84
85    return raw_paths, label_paths

Get paths to the EmbedSeg data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • name: Name of the dataset to download.
  • split: The split to use for the dataset.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the mage data. List of filepaths for the label data.

def get_embedseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], name: str, split: str = 'train', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 88def get_embedseg_dataset(
 89    path: Union[os.PathLike, str],
 90    patch_shape: Tuple[int, int],
 91    name: str,
 92    split: str = "train",
 93    download: bool = False,
 94    **kwargs
 95) -> Dataset:
 96    """Get the EmbedSeg dataset for 3d fluorescence microscopy segmentation.
 97
 98    Args:
 99        path: Filepath to a folder where the downloaded data will be saved.
100        patch_shape: The patch shape to use for training.
101        name: Name of the dataset to download.
102        split: The split to use for the dataset.
103        download: Whether to download the data if it is not present.
104        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
105
106    Returns:
107       The segmentation dataset.
108    """
109    raw_paths, label_paths = get_embedseg_paths(path, name, split, download)
110
111    return torch_em.default_segmentation_dataset(
112        raw_paths=raw_paths,
113        raw_key=None,
114        label_paths=label_paths,
115        label_key=None,
116        patch_shape=patch_shape,
117        **kwargs
118    )

Get the EmbedSeg dataset for 3d fluorescence microscopy segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • name: Name of the dataset to download.
  • split: The split to use for the dataset.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_embedseg_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, name: str, split: str = 'train', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
121def get_embedseg_loader(
122    path: Union[os.PathLike, str],
123    patch_shape: Tuple[int, int],
124    batch_size: int,
125    name: str,
126    split: str = "train",
127    download: bool = False,
128    **kwargs
129) -> DataLoader:
130    """Get the EmbedSeg dataloader for 3d fluorescence microscopy segmentation.
131
132    Args:
133        path: Filepath to a folder where the downloaded data will be saved.
134        patch_shape: The patch shape to use for training.
135        batch_size: The batch size for training.
136        name: Name of the dataset to download.
137        split: The split to use for the dataset.
138        download: Whether to download the data if it is not present.
139        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
140
141    Returns:
142        The DataLoader.
143    """
144    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
145    dataset = get_embedseg_dataset(
146        path, name=name, split=split, patch_shape=patch_shape, download=download, **ds_kwargs,
147    )
148    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)

Get the EmbedSeg dataloader for 3d fluorescence microscopy segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • name: Name of the dataset to download.
  • split: The split to use for the dataset.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.