torch_em.data.datasets.light_microscopy.orgasegment

The OrgaSegment dataset contains annotations for organoid segmentation of intestinal patient derived organoids in bright field images.

This dataset is from the publication https://doi.org/10.1038/s42003-024-05966-4. Please cite it if you use this dataset for your research.

View Source

  1"""The OrgaSegment dataset contains annotations for organoid segmentation
  2of intestinal patient derived organoids in bright field images.
  3
  4This dataset is from the publication https://doi.org/10.1038/s42003-024-05966-4.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9import shutil
 10from glob import glob
 11from typing import Tuple, Union, Literal, List
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20URL = "https://zenodo.org/records/10278229/files/OrganoidBasic_v20211206.zip"
 21CHECKSUM = "d067124d734108e46e18f65daaf17c89cb0a40bdacc6f6031815a6839e472798"
 22
 23
 24def get_orgasegment_data(
 25    path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False
 26) -> str:
 27    """Download the OrgaSegment dataset for organoid segmentation.
 28
 29    Args:
 30        path: Filepath to a folder where the downloaded data will be saved.
 31        split: The split to download. Either 'train', 'val or 'eval'.
 32        download: Whether to download the data if it is not present.
 33
 34    Returns:
 35        The filepath to the training data.
 36    """
 37    os.makedirs(path, exist_ok=True)
 38
 39    data_dir = os.path.join(path, split)
 40    if os.path.exists(data_dir):
 41        return data_dir
 42
 43    zip_path = os.path.join(path, "OrganoidBasic_v20211206.zip")
 44    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 45    util.unzip(zip_path=zip_path, dst=path, remove=True)
 46
 47    shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "train"), os.path.join(path, "train"))
 48    shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "val"), os.path.join(path, "val"))
 49    shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "eval"), os.path.join(path, "eval"))
 50    shutil.rmtree(os.path.join(path, "OrganoidBasic_v20211206"))
 51
 52    return data_dir
 53
 54
 55def get_orgasegment_paths(
 56    path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False
 57) -> Tuple[List[str], List[str]]:
 58    """Get paths for the OrgaSegment data.
 59
 60    Args:
 61        path: Filepath to a folder where the downloaded data will be saved.
 62        split: The split to download. Either 'train', 'val or 'eval'.
 63        download: Whether to download the data if it is not present.
 64
 65    Returns:
 66        List of filepaths to the image data.
 67        List of filepaths to the label data.
 68    """
 69    data_dir = get_orgasegment_data(path=path, split=split, download=download)
 70
 71    image_paths = sorted(glob(os.path.join(data_dir, "*_img.jpg")))
 72    label_paths = sorted(glob(os.path.join(data_dir, "*_masks_organoid.png")))
 73
 74    return image_paths, label_paths
 75
 76
 77def get_orgasegment_dataset(
 78    path: Union[os.PathLike, str],
 79    patch_shape: Tuple[int, int],
 80    split: Literal["train", "val", "eval"],
 81    boundaries: bool = False,
 82    binary: bool = False,
 83    download: bool = False,
 84    **kwargs
 85) -> Dataset:
 86    """Get the OrgaSegment dataset for organoid segmentation
 87
 88    Args:
 89        path: Filepath to a folder where the downloaded data will be saved.
 90        patch_shape: The patch shape to use for training.
 91        split: The split to download. Either 'train', 'val or 'eval'.
 92        boundaries: Whether to compute boundaries as the target.
 93        binary: Whether to use a binary segmentation target.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    assert split in ["train", "val", "eval"]
101
102    image_paths, label_paths = get_orgasegment_paths(path, split, download)
103
104    kwargs, _ = util.add_instance_label_transform(kwargs, add_binary_target=True, binary=binary, boundaries=boundaries)
105
106    return torch_em.default_segmentation_dataset(
107        raw_paths=image_paths,
108        raw_key=None,
109        label_paths=label_paths,
110        label_key=None,
111        patch_shape=patch_shape,
112        is_seg_dataset=False,
113        **kwargs
114    )
115
116
117def get_orgasegment_loader(
118    path: Union[os.PathLike, str],
119    batch_size: int,
120    patch_shape: Tuple[int, int],
121    split: Literal["train", "val", "eval"],
122    boundaries: bool = False,
123    binary: bool = False,
124    download: bool = False,
125    **kwargs
126) -> DataLoader:
127    """Get the OrgaSegment dataloader for organoid segmentation
128
129    Args:
130        path: Filepath to a folder where the downloaded data will be saved.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        split: The split to download. Either 'train', 'val or 'eval'.
134        boundaries: Whether to compute boundaries as the target.
135        binary: Whether to use a binary segmentation target.
136        download: Whether to download the data if it is not present.
137        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
138
139    Returns:
140        The DataLoader.
141    """
142    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
143    dataset = get_orgasegment_dataset(path, patch_shape, split, boundaries, binary, download, **ds_kwargs)
144    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://zenodo.org/records/10278229/files/OrganoidBasic_v20211206.zip'

CHECKSUM = 'd067124d734108e46e18f65daaf17c89cb0a40bdacc6f6031815a6839e472798'

def get_orgasegment_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'eval'], download: bool = False) -> str: View Source

25def get_orgasegment_data(
26    path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False
27) -> str:
28    """Download the OrgaSegment dataset for organoid segmentation.
29
30    Args:
31        path: Filepath to a folder where the downloaded data will be saved.
32        split: The split to download. Either 'train', 'val or 'eval'.
33        download: Whether to download the data if it is not present.
34
35    Returns:
36        The filepath to the training data.
37    """
38    os.makedirs(path, exist_ok=True)
39
40    data_dir = os.path.join(path, split)
41    if os.path.exists(data_dir):
42        return data_dir
43
44    zip_path = os.path.join(path, "OrganoidBasic_v20211206.zip")
45    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
46    util.unzip(zip_path=zip_path, dst=path, remove=True)
47
48    shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "train"), os.path.join(path, "train"))
49    shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "val"), os.path.join(path, "val"))
50    shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "eval"), os.path.join(path, "eval"))
51    shutil.rmtree(os.path.join(path, "OrganoidBasic_v20211206"))
52
53    return data_dir

Download the OrgaSegment dataset for organoid segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split to download. Either 'train', 'val or 'eval'.
download: Whether to download the data if it is not present.

Returns:

The filepath to the training data.

def get_orgasegment_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'eval'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

56def get_orgasegment_paths(
57    path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False
58) -> Tuple[List[str], List[str]]:
59    """Get paths for the OrgaSegment data.
60
61    Args:
62        path: Filepath to a folder where the downloaded data will be saved.
63        split: The split to download. Either 'train', 'val or 'eval'.
64        download: Whether to download the data if it is not present.
65
66    Returns:
67        List of filepaths to the image data.
68        List of filepaths to the label data.
69    """
70    data_dir = get_orgasegment_data(path=path, split=split, download=download)
71
72    image_paths = sorted(glob(os.path.join(data_dir, "*_img.jpg")))
73    label_paths = sorted(glob(os.path.join(data_dir, "*_masks_organoid.png")))
74
75    return image_paths, label_paths

Get paths for the OrgaSegment data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split to download. Either 'train', 'val or 'eval'.
download: Whether to download the data if it is not present.

Returns:

List of filepaths to the image data. List of filepaths to the label data.

def get_orgasegment_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'eval'], boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 78def get_orgasegment_dataset(
 79    path: Union[os.PathLike, str],
 80    patch_shape: Tuple[int, int],
 81    split: Literal["train", "val", "eval"],
 82    boundaries: bool = False,
 83    binary: bool = False,
 84    download: bool = False,
 85    **kwargs
 86) -> Dataset:
 87    """Get the OrgaSegment dataset for organoid segmentation
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        patch_shape: The patch shape to use for training.
 92        split: The split to download. Either 'train', 'val or 'eval'.
 93        boundaries: Whether to compute boundaries as the target.
 94        binary: Whether to use a binary segmentation target.
 95        download: Whether to download the data if it is not present.
 96        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 97
 98    Returns:
 99        The segmentation dataset.
100    """
101    assert split in ["train", "val", "eval"]
102
103    image_paths, label_paths = get_orgasegment_paths(path, split, download)
104
105    kwargs, _ = util.add_instance_label_transform(kwargs, add_binary_target=True, binary=binary, boundaries=boundaries)
106
107    return torch_em.default_segmentation_dataset(
108        raw_paths=image_paths,
109        raw_key=None,
110        label_paths=label_paths,
111        label_key=None,
112        patch_shape=patch_shape,
113        is_seg_dataset=False,
114        **kwargs
115    )

Get the OrgaSegment dataset for organoid segmentation

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
split: The split to download. Either 'train', 'val or 'eval'.
boundaries: Whether to compute boundaries as the target.
binary: Whether to use a binary segmentation target.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_orgasegment_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'eval'], boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

118def get_orgasegment_loader(
119    path: Union[os.PathLike, str],
120    batch_size: int,
121    patch_shape: Tuple[int, int],
122    split: Literal["train", "val", "eval"],
123    boundaries: bool = False,
124    binary: bool = False,
125    download: bool = False,
126    **kwargs
127) -> DataLoader:
128    """Get the OrgaSegment dataloader for organoid segmentation
129
130    Args:
131        path: Filepath to a folder where the downloaded data will be saved.
132        batch_size: The batch size for training.
133        patch_shape: The patch shape to use for training.
134        split: The split to download. Either 'train', 'val or 'eval'.
135        boundaries: Whether to compute boundaries as the target.
136        binary: Whether to use a binary segmentation target.
137        download: Whether to download the data if it is not present.
138        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
139
140    Returns:
141        The DataLoader.
142    """
143    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
144    dataset = get_orgasegment_dataset(path, patch_shape, split, boundaries, binary, download, **ds_kwargs)
145    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the OrgaSegment dataloader for organoid segmentation

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The split to download. Either 'train', 'val or 'eval'.
boundaries: Whether to compute boundaries as the target.
binary: Whether to use a binary segmentation target.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.