torch_em.data.datasets.light_microscopy.spheroids_hepg2
The HepG2 Spheroids dataset contains 3D confocal fluorescence microscopy images of twelve densely packed HepG2 human carcinoma cell nuclei spheroids, with manually annotated instance segmentation ground truth created using 3D Slicer.
Original image dimensions are 1024 x 1024 pixels (XY) with 1.01 µm z-step size.
The dataset is located at https://doi.org/10.6084/m9.figshare.16438314. This dataset is from the publication https://doi.org/10.1186/s12859-022-04827-3. Please cite it if you use this dataset in your research.
1"""The HepG2 Spheroids dataset contains 3D confocal fluorescence microscopy images 2of twelve densely packed HepG2 human carcinoma cell nuclei spheroids, with manually 3annotated instance segmentation ground truth created using 3D Slicer. 4 5Original image dimensions are 1024 x 1024 pixels (XY) with 1.01 µm z-step size. 6 7The dataset is located at https://doi.org/10.6084/m9.figshare.16438314. 8This dataset is from the publication https://doi.org/10.1186/s12859-022-04827-3. 9Please cite it if you use this dataset in your research. 10""" 11 12import os 13from glob import glob 14from natsort import natsorted 15from typing import List, Tuple, Union 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21from .. import util 22 23 24URL = "https://ndownloader.figshare.com/files/30449889" 25CHECKSUM = None 26 27 28def get_spheroids_hepg2_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the HepG2 Spheroids dataset. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The filepath to the extracted data directory. 37 """ 38 # The zip extracts GT/, spheroids/, and seeds/ directly into path. 39 if os.path.exists(os.path.join(path, "GT")): 40 return path 41 42 os.makedirs(path, exist_ok=True) 43 zip_path = os.path.join(path, "12spheroids.zip") 44 util.download_source(zip_path, URL, download, checksum=CHECKSUM) 45 util.unzip(zip_path, path) 46 47 return path 48 49 50def get_spheroids_hepg2_paths( 51 path: Union[os.PathLike, str], download: bool = False, 52) -> Tuple[List[str], List[str]]: 53 """Get paths to the HepG2 Spheroids data. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_spheroids_hepg2_data(path, download) 64 65 # Raw: spheroids/{N}_smoothed_spheroid.nrrd (exclude _expanded_3 variants) 66 raw_paths = natsorted([ 67 p for p in glob(os.path.join(data_dir, "spheroids", "*.nrrd")) 68 if "expanded" not in os.path.basename(p) 69 ]) 70 # Labels: GT/{N}_GT.nrrd (exclude _expanded_3_DT variants) 71 label_paths = natsorted([ 72 p for p in glob(os.path.join(data_dir, "GT", "*.nrrd")) 73 if "expanded" not in os.path.basename(p) 74 ]) 75 76 if len(raw_paths) == 0: 77 raise RuntimeError( 78 f"No image files found in {os.path.join(data_dir, 'spheroids')}. " 79 "Please check the dataset structure after downloading." 80 ) 81 if len(raw_paths) != len(label_paths): 82 raise RuntimeError( 83 f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match." 84 ) 85 86 return raw_paths, label_paths 87 88 89def get_spheroids_hepg2_dataset( 90 path: Union[os.PathLike, str], 91 patch_shape: Tuple[int, ...], 92 download: bool = False, 93 **kwargs, 94) -> Dataset: 95 """Get the HepG2 Spheroids dataset for 3D nucleus instance segmentation. 96 97 Args: 98 path: Filepath to a folder where the downloaded data will be saved. 99 patch_shape: The patch shape to use for training. 100 download: Whether to download the data if it is not present. 101 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 102 103 Returns: 104 The segmentation dataset. 105 """ 106 raw_paths, label_paths = get_spheroids_hepg2_paths(path, download) 107 108 return torch_em.default_segmentation_dataset( 109 raw_paths=raw_paths, 110 raw_key=None, 111 label_paths=label_paths, 112 label_key=None, 113 patch_shape=patch_shape, 114 **kwargs, 115 ) 116 117 118def get_spheroids_hepg2_loader( 119 path: Union[os.PathLike, str], 120 batch_size: int, 121 patch_shape: Tuple[int, ...], 122 download: bool = False, 123 **kwargs, 124) -> DataLoader: 125 """Get the HepG2 Spheroids dataloader for 3D nucleus instance segmentation. 126 127 Args: 128 path: Filepath to a folder where the downloaded data will be saved. 129 batch_size: The batch size for training. 130 patch_shape: The patch shape to use for training. 131 download: Whether to download the data if it is not present. 132 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 133 134 Returns: 135 The DataLoader. 136 """ 137 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 138 dataset = get_spheroids_hepg2_dataset(path, patch_shape, download, **ds_kwargs) 139 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
29def get_spheroids_hepg2_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the HepG2 Spheroids dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The filepath to the extracted data directory. 38 """ 39 # The zip extracts GT/, spheroids/, and seeds/ directly into path. 40 if os.path.exists(os.path.join(path, "GT")): 41 return path 42 43 os.makedirs(path, exist_ok=True) 44 zip_path = os.path.join(path, "12spheroids.zip") 45 util.download_source(zip_path, URL, download, checksum=CHECKSUM) 46 util.unzip(zip_path, path) 47 48 return path
Download the HepG2 Spheroids dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the extracted data directory.
51def get_spheroids_hepg2_paths( 52 path: Union[os.PathLike, str], download: bool = False, 53) -> Tuple[List[str], List[str]]: 54 """Get paths to the HepG2 Spheroids data. 55 56 Args: 57 path: Filepath to a folder where the downloaded data will be saved. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_dir = get_spheroids_hepg2_data(path, download) 65 66 # Raw: spheroids/{N}_smoothed_spheroid.nrrd (exclude _expanded_3 variants) 67 raw_paths = natsorted([ 68 p for p in glob(os.path.join(data_dir, "spheroids", "*.nrrd")) 69 if "expanded" not in os.path.basename(p) 70 ]) 71 # Labels: GT/{N}_GT.nrrd (exclude _expanded_3_DT variants) 72 label_paths = natsorted([ 73 p for p in glob(os.path.join(data_dir, "GT", "*.nrrd")) 74 if "expanded" not in os.path.basename(p) 75 ]) 76 77 if len(raw_paths) == 0: 78 raise RuntimeError( 79 f"No image files found in {os.path.join(data_dir, 'spheroids')}. " 80 "Please check the dataset structure after downloading." 81 ) 82 if len(raw_paths) != len(label_paths): 83 raise RuntimeError( 84 f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match." 85 ) 86 87 return raw_paths, label_paths
Get paths to the HepG2 Spheroids data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
90def get_spheroids_hepg2_dataset( 91 path: Union[os.PathLike, str], 92 patch_shape: Tuple[int, ...], 93 download: bool = False, 94 **kwargs, 95) -> Dataset: 96 """Get the HepG2 Spheroids dataset for 3D nucleus instance segmentation. 97 98 Args: 99 path: Filepath to a folder where the downloaded data will be saved. 100 patch_shape: The patch shape to use for training. 101 download: Whether to download the data if it is not present. 102 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 103 104 Returns: 105 The segmentation dataset. 106 """ 107 raw_paths, label_paths = get_spheroids_hepg2_paths(path, download) 108 109 return torch_em.default_segmentation_dataset( 110 raw_paths=raw_paths, 111 raw_key=None, 112 label_paths=label_paths, 113 label_key=None, 114 patch_shape=patch_shape, 115 **kwargs, 116 )
Get the HepG2 Spheroids dataset for 3D nucleus instance segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
119def get_spheroids_hepg2_loader( 120 path: Union[os.PathLike, str], 121 batch_size: int, 122 patch_shape: Tuple[int, ...], 123 download: bool = False, 124 **kwargs, 125) -> DataLoader: 126 """Get the HepG2 Spheroids dataloader for 3D nucleus instance segmentation. 127 128 Args: 129 path: Filepath to a folder where the downloaded data will be saved. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 download: Whether to download the data if it is not present. 133 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 134 135 Returns: 136 The DataLoader. 137 """ 138 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 139 dataset = get_spheroids_hepg2_dataset(path, patch_shape, download, **ds_kwargs) 140 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the HepG2 Spheroids dataloader for 3D nucleus instance segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.