torch_em.data.datasets.light_microscopy.organoidnet

The OrganoIDNet dataset contains annotations of panceratic organoids.

This dataset is from the publication https://doi.org/10.1007/s13402-024-00958-2. Please cite it if you use this dataset for a publication.

  1"""The OrganoIDNet dataset contains annotations of panceratic organoids.
  2
  3This dataset is from the publication https://doi.org/10.1007/s13402-024-00958-2.
  4Please cite it if you use this dataset for a publication.
  5"""
  6
  7
  8import os
  9import shutil
 10import zipfile
 11from glob import glob
 12from typing import Tuple, Union, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/10643410/files/OrganoIDNetData.zip?download=1"
 22CHECKSUM = "3cd9239bf74bda096ecb5b7bdb95f800c7fa30b9937f9aba6ddf98d754cbfa3d"
 23
 24
 25def get_organoidnet_data(path: Union[os.PathLike, str], split: str, download: bool = False) -> str:
 26    """Download the OrganoIDNet dataset.
 27
 28    Args:
 29        path: Filepath to the folder where the downloaded data will be saved.
 30        split: The data split to use.
 31        download: Whether to download the data if it is not present.
 32
 33    Returns:
 34        The filepath where the data is downloaded.
 35    """
 36    splits = ["Training", "Validation", "Test"]
 37    assert split in splits
 38
 39    os.makedirs(path, exist_ok=True)
 40
 41    data_dir = os.path.join(path, split)
 42    if os.path.exists(data_dir):
 43        return data_dir
 44
 45    # Download and extraction.
 46    zip_path = os.path.join(path, "OrganoIDNetData.zip")
 47    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 48
 49    # Only "Training", "Test", "Validation" from the zip are relevant and need to be extracted.
 50    # They are in "/OrganoIDNetData/Dataset/"
 51    prefix = "OrganoIDNetData/Dataset/"
 52    for dl_split in splits:
 53
 54        dl_prefix = prefix + dl_split
 55
 56        with zipfile.ZipFile(zip_path) as archive:
 57            for ff in archive.namelist():
 58                if ff.startswith(dl_prefix):
 59                    archive.extract(ff, path)
 60
 61    for dl_split in splits:
 62        shutil.move(
 63            os.path.join(path, "OrganoIDNetData/Dataset", dl_split),
 64            os.path.join(path, dl_split)
 65        )
 66
 67    assert os.path.exists(data_dir)
 68
 69    # os.remove(zip_path)
 70    return data_dir
 71
 72
 73def get_organoidnet_paths(
 74    path: Union[os.PathLike, str], split: str, download: bool = False
 75) -> Tuple[List[str], List[str]]:
 76    """Get paths to the OrganoIDNet data.
 77
 78    Args:
 79        path: Filepath to the folder where the downloaded data will be saved.
 80        split: The data split to use.
 81        download: Whether to download the data if it is not present.
 82
 83    Returns:
 84        List of filepaths for the image data.
 85        List of filepaths for the label data.
 86    """
 87    data_dir = get_organoidnet_data(path=path, split=split, download=download)
 88
 89    image_paths = sorted(glob(os.path.join(data_dir, "Images", "*.tif")))
 90    label_paths = sorted(glob(os.path.join(data_dir, "Masks", "*.tif")))
 91
 92    return image_paths, label_paths
 93
 94
 95def get_organoidnet_dataset(
 96    path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs
 97) -> Dataset:
 98    """Get the OrganoIDNet dataset for organoid segmentation in microscopy images.
 99
100    Args:
101        path: Filepath to a folder where the downloaded data will be saved.
102        split: The data split to use.
103        patch_shape: The patch shape to use for training.
104        download: Whether to download the data if it is not present.
105        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
106
107    Returns:
108        The segmentation dataset.
109    """
110    image_paths, label_paths = get_organoidnet_paths(path, split, download)
111
112    return torch_em.default_segmentation_dataset(
113        raw_paths=image_paths,
114        raw_key=None,
115        label_paths=label_paths,
116        label_key=None,
117        patch_shape=patch_shape,
118        is_seg_dataset=False,
119        **kwargs
120    )
121
122
123def get_organoidnet_loader(
124    path: Union[os.PathLike, str],
125    split: str,
126    patch_shape: Tuple[int, int],
127    batch_size: int,
128    download: bool = False,
129    **kwargs
130) -> DataLoader:
131    """Get the OrganoIDNet dataset for organoid segmentation in microscopy images.
132
133    Args:
134        path: Filepath to a folder where the downloaded data will be saved.
135        split: The data split to use.
136        patch_shape: The patch shape to use for training.
137        batch_size: The batch size for training.
138        download: Whether to download the data if it is not present.
139        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
140
141    Returns:
142        The DataLoader.
143    """
144    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
145    dataset = get_organoidnet_dataset(
146        path=path, split=split, patch_shape=patch_shape, download=download, **ds_kwargs
147    )
148    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/10643410/files/OrganoIDNetData.zip?download=1'
CHECKSUM = '3cd9239bf74bda096ecb5b7bdb95f800c7fa30b9937f9aba6ddf98d754cbfa3d'
def get_organoidnet_data(path: Union[os.PathLike, str], split: str, download: bool = False) -> str:
26def get_organoidnet_data(path: Union[os.PathLike, str], split: str, download: bool = False) -> str:
27    """Download the OrganoIDNet dataset.
28
29    Args:
30        path: Filepath to the folder where the downloaded data will be saved.
31        split: The data split to use.
32        download: Whether to download the data if it is not present.
33
34    Returns:
35        The filepath where the data is downloaded.
36    """
37    splits = ["Training", "Validation", "Test"]
38    assert split in splits
39
40    os.makedirs(path, exist_ok=True)
41
42    data_dir = os.path.join(path, split)
43    if os.path.exists(data_dir):
44        return data_dir
45
46    # Download and extraction.
47    zip_path = os.path.join(path, "OrganoIDNetData.zip")
48    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
49
50    # Only "Training", "Test", "Validation" from the zip are relevant and need to be extracted.
51    # They are in "/OrganoIDNetData/Dataset/"
52    prefix = "OrganoIDNetData/Dataset/"
53    for dl_split in splits:
54
55        dl_prefix = prefix + dl_split
56
57        with zipfile.ZipFile(zip_path) as archive:
58            for ff in archive.namelist():
59                if ff.startswith(dl_prefix):
60                    archive.extract(ff, path)
61
62    for dl_split in splits:
63        shutil.move(
64            os.path.join(path, "OrganoIDNetData/Dataset", dl_split),
65            os.path.join(path, dl_split)
66        )
67
68    assert os.path.exists(data_dir)
69
70    # os.remove(zip_path)
71    return data_dir

Download the OrganoIDNet dataset.

Arguments:
  • path: Filepath to the folder where the downloaded data will be saved.
  • split: The data split to use.
  • download: Whether to download the data if it is not present.
Returns:

The filepath where the data is downloaded.

def get_organoidnet_paths( path: Union[os.PathLike, str], split: str, download: bool = False) -> Tuple[List[str], List[str]]:
74def get_organoidnet_paths(
75    path: Union[os.PathLike, str], split: str, download: bool = False
76) -> Tuple[List[str], List[str]]:
77    """Get paths to the OrganoIDNet data.
78
79    Args:
80        path: Filepath to the folder where the downloaded data will be saved.
81        split: The data split to use.
82        download: Whether to download the data if it is not present.
83
84    Returns:
85        List of filepaths for the image data.
86        List of filepaths for the label data.
87    """
88    data_dir = get_organoidnet_data(path=path, split=split, download=download)
89
90    image_paths = sorted(glob(os.path.join(data_dir, "Images", "*.tif")))
91    label_paths = sorted(glob(os.path.join(data_dir, "Masks", "*.tif")))
92
93    return image_paths, label_paths

Get paths to the OrganoIDNet data.

Arguments:
  • path: Filepath to the folder where the downloaded data will be saved.
  • split: The data split to use.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_organoidnet_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 96def get_organoidnet_dataset(
 97    path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs
 98) -> Dataset:
 99    """Get the OrganoIDNet dataset for organoid segmentation in microscopy images.
100
101    Args:
102        path: Filepath to a folder where the downloaded data will be saved.
103        split: The data split to use.
104        patch_shape: The patch shape to use for training.
105        download: Whether to download the data if it is not present.
106        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
107
108    Returns:
109        The segmentation dataset.
110    """
111    image_paths, label_paths = get_organoidnet_paths(path, split, download)
112
113    return torch_em.default_segmentation_dataset(
114        raw_paths=image_paths,
115        raw_key=None,
116        label_paths=label_paths,
117        label_key=None,
118        patch_shape=patch_shape,
119        is_seg_dataset=False,
120        **kwargs
121    )

Get the OrganoIDNet dataset for organoid segmentation in microscopy images.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_organoidnet_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
124def get_organoidnet_loader(
125    path: Union[os.PathLike, str],
126    split: str,
127    patch_shape: Tuple[int, int],
128    batch_size: int,
129    download: bool = False,
130    **kwargs
131) -> DataLoader:
132    """Get the OrganoIDNet dataset for organoid segmentation in microscopy images.
133
134    Args:
135        path: Filepath to a folder where the downloaded data will be saved.
136        split: The data split to use.
137        patch_shape: The patch shape to use for training.
138        batch_size: The batch size for training.
139        download: Whether to download the data if it is not present.
140        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
141
142    Returns:
143        The DataLoader.
144    """
145    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
146    dataset = get_organoidnet_dataset(
147        path=path, split=split, patch_shape=patch_shape, download=download, **ds_kwargs
148    )
149    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the OrganoIDNet dataset for organoid segmentation in microscopy images.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.