torch_em.data.datasets.light_microscopy.spheroids_hepg2

The HepG2 Spheroids dataset contains 3D confocal fluorescence microscopy images of twelve densely packed HepG2 human carcinoma cell nuclei spheroids, with manually annotated instance segmentation ground truth created using 3D Slicer.

Original image dimensions are 1024 x 1024 pixels (XY) with 1.01 µm z-step size.

The dataset is located at https://doi.org/10.6084/m9.figshare.16438314. This dataset is from the publication https://doi.org/10.1186/s12859-022-04827-3. Please cite it if you use this dataset in your research.

  1"""The HepG2 Spheroids dataset contains 3D confocal fluorescence microscopy images
  2of twelve densely packed HepG2 human carcinoma cell nuclei spheroids, with manually
  3annotated instance segmentation ground truth created using 3D Slicer.
  4
  5Original image dimensions are 1024 x 1024 pixels (XY) with 1.01 µm z-step size.
  6
  7The dataset is located at https://doi.org/10.6084/m9.figshare.16438314.
  8This dataset is from the publication https://doi.org/10.1186/s12859-022-04827-3.
  9Please cite it if you use this dataset in your research.
 10"""
 11
 12import os
 13from glob import glob
 14from natsort import natsorted
 15from typing import List, Tuple, Union
 16
 17from torch.utils.data import Dataset, DataLoader
 18
 19import torch_em
 20
 21from .. import util
 22
 23
 24URL = "https://ndownloader.figshare.com/files/30449889"
 25CHECKSUM = None
 26
 27
 28def get_spheroids_hepg2_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the HepG2 Spheroids dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        The filepath to the extracted data directory.
 37    """
 38    # The zip extracts GT/, spheroids/, and seeds/ directly into path.
 39    if os.path.exists(os.path.join(path, "GT")):
 40        return path
 41
 42    os.makedirs(path, exist_ok=True)
 43    zip_path = os.path.join(path, "12spheroids.zip")
 44    util.download_source(zip_path, URL, download, checksum=CHECKSUM)
 45    util.unzip(zip_path, path)
 46
 47    return path
 48
 49
 50def get_spheroids_hepg2_paths(
 51    path: Union[os.PathLike, str], download: bool = False,
 52) -> Tuple[List[str], List[str]]:
 53    """Get paths to the HepG2 Spheroids data.
 54
 55    Args:
 56        path: Filepath to a folder where the downloaded data will be saved.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        List of filepaths for the image data.
 61        List of filepaths for the label data.
 62    """
 63    data_dir = get_spheroids_hepg2_data(path, download)
 64
 65    # Raw: spheroids/{N}_smoothed_spheroid.nrrd (exclude _expanded_3 variants)
 66    raw_paths = natsorted([
 67        p for p in glob(os.path.join(data_dir, "spheroids", "*.nrrd"))
 68        if "expanded" not in os.path.basename(p)
 69    ])
 70    # Labels: GT/{N}_GT.nrrd (exclude _expanded_3_DT variants)
 71    label_paths = natsorted([
 72        p for p in glob(os.path.join(data_dir, "GT", "*.nrrd"))
 73        if "expanded" not in os.path.basename(p)
 74    ])
 75
 76    if len(raw_paths) == 0:
 77        raise RuntimeError(
 78            f"No image files found in {os.path.join(data_dir, 'spheroids')}. "
 79            "Please check the dataset structure after downloading."
 80        )
 81    if len(raw_paths) != len(label_paths):
 82        raise RuntimeError(
 83            f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match."
 84        )
 85
 86    return raw_paths, label_paths
 87
 88
 89def get_spheroids_hepg2_dataset(
 90    path: Union[os.PathLike, str],
 91    patch_shape: Tuple[int, ...],
 92    download: bool = False,
 93    **kwargs,
 94) -> Dataset:
 95    """Get the HepG2 Spheroids dataset for 3D nucleus instance segmentation.
 96
 97    Args:
 98        path: Filepath to a folder where the downloaded data will be saved.
 99        patch_shape: The patch shape to use for training.
100        download: Whether to download the data if it is not present.
101        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
102
103    Returns:
104        The segmentation dataset.
105    """
106    raw_paths, label_paths = get_spheroids_hepg2_paths(path, download)
107
108    return torch_em.default_segmentation_dataset(
109        raw_paths=raw_paths,
110        raw_key=None,
111        label_paths=label_paths,
112        label_key=None,
113        patch_shape=patch_shape,
114        **kwargs,
115    )
116
117
118def get_spheroids_hepg2_loader(
119    path: Union[os.PathLike, str],
120    batch_size: int,
121    patch_shape: Tuple[int, ...],
122    download: bool = False,
123    **kwargs,
124) -> DataLoader:
125    """Get the HepG2 Spheroids dataloader for 3D nucleus instance segmentation.
126
127    Args:
128        path: Filepath to a folder where the downloaded data will be saved.
129        batch_size: The batch size for training.
130        patch_shape: The patch shape to use for training.
131        download: Whether to download the data if it is not present.
132        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
133
134    Returns:
135        The DataLoader.
136    """
137    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
138    dataset = get_spheroids_hepg2_dataset(path, patch_shape, download, **ds_kwargs)
139    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://ndownloader.figshare.com/files/30449889'
CHECKSUM = None
def get_spheroids_hepg2_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_spheroids_hepg2_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the HepG2 Spheroids dataset.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        The filepath to the extracted data directory.
38    """
39    # The zip extracts GT/, spheroids/, and seeds/ directly into path.
40    if os.path.exists(os.path.join(path, "GT")):
41        return path
42
43    os.makedirs(path, exist_ok=True)
44    zip_path = os.path.join(path, "12spheroids.zip")
45    util.download_source(zip_path, URL, download, checksum=CHECKSUM)
46    util.unzip(zip_path, path)
47
48    return path

Download the HepG2 Spheroids dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the extracted data directory.

def get_spheroids_hepg2_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
51def get_spheroids_hepg2_paths(
52    path: Union[os.PathLike, str], download: bool = False,
53) -> Tuple[List[str], List[str]]:
54    """Get paths to the HepG2 Spheroids data.
55
56    Args:
57        path: Filepath to a folder where the downloaded data will be saved.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        List of filepaths for the image data.
62        List of filepaths for the label data.
63    """
64    data_dir = get_spheroids_hepg2_data(path, download)
65
66    # Raw: spheroids/{N}_smoothed_spheroid.nrrd (exclude _expanded_3 variants)
67    raw_paths = natsorted([
68        p for p in glob(os.path.join(data_dir, "spheroids", "*.nrrd"))
69        if "expanded" not in os.path.basename(p)
70    ])
71    # Labels: GT/{N}_GT.nrrd (exclude _expanded_3_DT variants)
72    label_paths = natsorted([
73        p for p in glob(os.path.join(data_dir, "GT", "*.nrrd"))
74        if "expanded" not in os.path.basename(p)
75    ])
76
77    if len(raw_paths) == 0:
78        raise RuntimeError(
79            f"No image files found in {os.path.join(data_dir, 'spheroids')}. "
80            "Please check the dataset structure after downloading."
81        )
82    if len(raw_paths) != len(label_paths):
83        raise RuntimeError(
84            f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match."
85        )
86
87    return raw_paths, label_paths

Get paths to the HepG2 Spheroids data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_spheroids_hepg2_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 90def get_spheroids_hepg2_dataset(
 91    path: Union[os.PathLike, str],
 92    patch_shape: Tuple[int, ...],
 93    download: bool = False,
 94    **kwargs,
 95) -> Dataset:
 96    """Get the HepG2 Spheroids dataset for 3D nucleus instance segmentation.
 97
 98    Args:
 99        path: Filepath to a folder where the downloaded data will be saved.
100        patch_shape: The patch shape to use for training.
101        download: Whether to download the data if it is not present.
102        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
103
104    Returns:
105        The segmentation dataset.
106    """
107    raw_paths, label_paths = get_spheroids_hepg2_paths(path, download)
108
109    return torch_em.default_segmentation_dataset(
110        raw_paths=raw_paths,
111        raw_key=None,
112        label_paths=label_paths,
113        label_key=None,
114        patch_shape=patch_shape,
115        **kwargs,
116    )

Get the HepG2 Spheroids dataset for 3D nucleus instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_spheroids_hepg2_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
119def get_spheroids_hepg2_loader(
120    path: Union[os.PathLike, str],
121    batch_size: int,
122    patch_shape: Tuple[int, ...],
123    download: bool = False,
124    **kwargs,
125) -> DataLoader:
126    """Get the HepG2 Spheroids dataloader for 3D nucleus instance segmentation.
127
128    Args:
129        path: Filepath to a folder where the downloaded data will be saved.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        download: Whether to download the data if it is not present.
133        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
134
135    Returns:
136        The DataLoader.
137    """
138    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
139    dataset = get_spheroids_hepg2_dataset(path, patch_shape, download, **ds_kwargs)
140    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the HepG2 Spheroids dataloader for 3D nucleus instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.