torch_em.data.datasets.light_microscopy.dic_hepg2
This dataset ontains annotation for cell segmentation in differential interference contrast (DIC) microscopy images.
This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2024.109151. Please cite it if you use this dataset in your research.
1"""This dataset ontains annotation for cell segmentation in 2differential interference contrast (DIC) microscopy images. 3 4This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2024.109151. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from tqdm import tqdm 10from glob import glob 11from pathlib import Path 12from natsort import natsorted 13from typing import Union, Literal, Tuple, Optional, List 14 15import imageio.v3 as imageio 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21try: 22 from pycocotools.coco import COCO 23except ImportError: 24 COCO = None 25 26from .. import util 27from .livecell import _annotations_to_instances 28 29 30URL = "https://zenodo.org/records/13120679/files/2021-11-15_HepG2_Calcein_AM.zip" 31CHECKSUM = "42b939d01c5fc2517dc3ad34bde596ac38dbeba2a96173f37e1b6dfe14cbe3a2" 32 33 34def get_dic_hepg2_data(path: Union[str, os.PathLike], download: bool = False) -> str: 35 """Download the DIC HepG2 dataset. 36 37 Args: 38 path: Filepath to a folder where the downloaded data will be stored. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 The path to the folder where data is stored. 43 """ 44 if os.path.exists(path): 45 return path 46 47 os.makedirs(path, exist_ok=True) 48 zip_path = os.path.join(path, "2021-11-15_HepG2_Calcein_AM.zip") 49 util.download_source(zip_path, URL, download, CHECKSUM) 50 util.unzip(zip_path, path, True) 51 52 return path 53 54 55def _create_segmentations_from_coco_annotation(path, split): 56 assert COCO is not None, "pycocotools is required for processing the LiveCELL ground-truth." 57 58 base_dir = os.path.join(path, "2021-11-15_HepG2_Calcein_AM", "coco_format", split) 59 image_folder = os.path.join(base_dir, "images") 60 gt_folder = os.path.join(base_dir, "annotations") 61 if os.path.exists(gt_folder): 62 return image_folder, gt_folder 63 64 os.makedirs(gt_folder, exist_ok=True) 65 66 ann_file = os.path.join(base_dir, "annotations.json") 67 assert os.path.exists(ann_file) 68 coco = COCO(ann_file) 69 category_ids = coco.getCatIds(catNms=["cell"]) 70 image_ids = coco.getImgIds(catIds=category_ids) 71 72 for image_id in tqdm( 73 image_ids, desc="Creating DIC HepG2 segmentations from coco-style annotations" 74 ): 75 image_metadata = coco.loadImgs(image_id)[0] 76 fname = image_metadata["file_name"] 77 78 gt_path = os.path.join(gt_folder, Path(fname).with_suffix(".tif")) 79 80 gt = _annotations_to_instances(coco, image_metadata, category_ids) 81 imageio.imwrite(gt_path, gt, compression="zlib") 82 83 return image_folder, gt_folder 84 85 86def get_dic_hepg2_paths( 87 path: Union[os.PathLike, str], split: str, download: bool = False 88) -> Tuple[List[str], List[str]]: 89 """Get paths to DIC HepG2 data. 90 91 Args: 92 path: Filepath to a folder where the downloaded data will be saved. 93 split: The data split to use. Either 'train', 'val' or 'test'. 94 download: Whether to download the data if it is not present. 95 96 Returns: 97 List of filepaths for the image data. 98 List of filepaths for the label data. 99 """ 100 path = get_dic_hepg2_data(path=path, download=download) 101 102 image_folder, gt_folder = _create_segmentations_from_coco_annotation(path=path, split=split) 103 gt_paths = natsorted(glob(os.path.join(gt_folder, "*.tif"))) 104 image_paths = [os.path.join(image_folder, f"{Path(gt_path).stem}.png") for gt_path in gt_paths] 105 106 return image_paths, gt_paths 107 108 109def get_dic_hepg2_dataset( 110 path: Union[str, os.PathLike], 111 patch_shape: Tuple[int, int], 112 split: Literal["train", "val", "test"], 113 offsets: Optional[List[List[int]]] = None, 114 boundaries: bool = False, 115 binary: bool = False, 116 download: bool = False, 117 **kwargs 118) -> Dataset: 119 """Get the DIC HepG2 dataset for segmenting cells in differential interference contrast microscopy. 120 121 Args: 122 path: Filepath to a folder where the downloaded data will be saved. 123 split: The data split to use. Either 'train', 'val' or 'test'. 124 patch_shape: The patch shape to use for training. 125 download: Whether to download the data if it is not present. 126 offsets: Offset values for affinity computation used as target. 127 boundaries: Whether to compute boundaries as the target. 128 binary: Whether to use a binary segmentation target. 129 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 130 131 Returns: 132 The segmentation dataset. 133 """ 134 image_paths, gt_paths = get_dic_hepg2_paths(path=path, split=split) 135 136 kwargs = util.ensure_transforms(ndim=2, **kwargs) 137 kwargs, _ = util.add_instance_label_transform( 138 kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary 139 ) 140 141 return torch_em.default_segmentation_dataset( 142 raw_paths=image_paths, 143 raw_key=None, 144 label_paths=gt_paths, 145 label_key=None, 146 patch_shape=patch_shape, 147 is_seg_dataset=False, 148 **kwargs 149 ) 150 151 152def get_dic_hepg2_loader( 153 path: Union[str, os.PathLike], 154 split: Literal['train', 'val', 'test'], 155 patch_shape: Tuple[int, int], 156 batch_size: int, 157 offsets: Optional[List[List[int]]] = None, 158 boundaries: bool = False, 159 binary: bool = False, 160 download: bool = False, 161 **kwargs 162) -> DataLoader: 163 """Get the DIC HepG2 dataloader for segmenting cells in differential interference contrast microscopy. 164 165 Args: 166 path: Filepath to a folder where the downloaded data will be saved. 167 split: The data split to use. Either 'train', 'val' or 'test'. 168 patch_shape: The patch shape to use for training. 169 batch_size: The batch size for training. 170 download: Whether to download the data if it is not present. 171 offsets: Offset values for affinity computation used as target. 172 boundaries: Whether to compute boundaries as the target. 173 binary: Whether to use a binary segmentation target. 174 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 175 176 Returns: 177 The DataLoader. 178 """ 179 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 180 dataset = get_dic_hepg2_dataset( 181 path=path, 182 patch_shape=patch_shape, 183 split=split, 184 offsets=offsets, 185 boundaries=boundaries, 186 binary=binary, 187 download=download, 188 **ds_kwargs 189 ) 190 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/13120679/files/2021-11-15_HepG2_Calcein_AM.zip'
CHECKSUM =
'42b939d01c5fc2517dc3ad34bde596ac38dbeba2a96173f37e1b6dfe14cbe3a2'
def
get_dic_hepg2_data(path: Union[str, os.PathLike], download: bool = False) -> str:
35def get_dic_hepg2_data(path: Union[str, os.PathLike], download: bool = False) -> str: 36 """Download the DIC HepG2 dataset. 37 38 Args: 39 path: Filepath to a folder where the downloaded data will be stored. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 The path to the folder where data is stored. 44 """ 45 if os.path.exists(path): 46 return path 47 48 os.makedirs(path, exist_ok=True) 49 zip_path = os.path.join(path, "2021-11-15_HepG2_Calcein_AM.zip") 50 util.download_source(zip_path, URL, download, CHECKSUM) 51 util.unzip(zip_path, path, True) 52 53 return path
Download the DIC HepG2 dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be stored.
- download: Whether to download the data if it is not present.
Returns:
The path to the folder where data is stored.
def
get_dic_hepg2_paths( path: Union[os.PathLike, str], split: str, download: bool = False) -> Tuple[List[str], List[str]]:
87def get_dic_hepg2_paths( 88 path: Union[os.PathLike, str], split: str, download: bool = False 89) -> Tuple[List[str], List[str]]: 90 """Get paths to DIC HepG2 data. 91 92 Args: 93 path: Filepath to a folder where the downloaded data will be saved. 94 split: The data split to use. Either 'train', 'val' or 'test'. 95 download: Whether to download the data if it is not present. 96 97 Returns: 98 List of filepaths for the image data. 99 List of filepaths for the label data. 100 """ 101 path = get_dic_hepg2_data(path=path, download=download) 102 103 image_folder, gt_folder = _create_segmentations_from_coco_annotation(path=path, split=split) 104 gt_paths = natsorted(glob(os.path.join(gt_folder, "*.tif"))) 105 image_paths = [os.path.join(image_folder, f"{Path(gt_path).stem}.png") for gt_path in gt_paths] 106 107 return image_paths, gt_paths
Get paths to DIC HepG2 data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_dic_hepg2_dataset( path: Union[str, os.PathLike], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
110def get_dic_hepg2_dataset( 111 path: Union[str, os.PathLike], 112 patch_shape: Tuple[int, int], 113 split: Literal["train", "val", "test"], 114 offsets: Optional[List[List[int]]] = None, 115 boundaries: bool = False, 116 binary: bool = False, 117 download: bool = False, 118 **kwargs 119) -> Dataset: 120 """Get the DIC HepG2 dataset for segmenting cells in differential interference contrast microscopy. 121 122 Args: 123 path: Filepath to a folder where the downloaded data will be saved. 124 split: The data split to use. Either 'train', 'val' or 'test'. 125 patch_shape: The patch shape to use for training. 126 download: Whether to download the data if it is not present. 127 offsets: Offset values for affinity computation used as target. 128 boundaries: Whether to compute boundaries as the target. 129 binary: Whether to use a binary segmentation target. 130 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 131 132 Returns: 133 The segmentation dataset. 134 """ 135 image_paths, gt_paths = get_dic_hepg2_paths(path=path, split=split) 136 137 kwargs = util.ensure_transforms(ndim=2, **kwargs) 138 kwargs, _ = util.add_instance_label_transform( 139 kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary 140 ) 141 142 return torch_em.default_segmentation_dataset( 143 raw_paths=image_paths, 144 raw_key=None, 145 label_paths=gt_paths, 146 label_key=None, 147 patch_shape=patch_shape, 148 is_seg_dataset=False, 149 **kwargs 150 )
Get the DIC HepG2 dataset for segmenting cells in differential interference contrast microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', 'val' or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_dic_hepg2_loader( path: Union[str, os.PathLike], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
153def get_dic_hepg2_loader( 154 path: Union[str, os.PathLike], 155 split: Literal['train', 'val', 'test'], 156 patch_shape: Tuple[int, int], 157 batch_size: int, 158 offsets: Optional[List[List[int]]] = None, 159 boundaries: bool = False, 160 binary: bool = False, 161 download: bool = False, 162 **kwargs 163) -> DataLoader: 164 """Get the DIC HepG2 dataloader for segmenting cells in differential interference contrast microscopy. 165 166 Args: 167 path: Filepath to a folder where the downloaded data will be saved. 168 split: The data split to use. Either 'train', 'val' or 'test'. 169 patch_shape: The patch shape to use for training. 170 batch_size: The batch size for training. 171 download: Whether to download the data if it is not present. 172 offsets: Offset values for affinity computation used as target. 173 boundaries: Whether to compute boundaries as the target. 174 binary: Whether to use a binary segmentation target. 175 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 176 177 Returns: 178 The DataLoader. 179 """ 180 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 181 dataset = get_dic_hepg2_dataset( 182 path=path, 183 patch_shape=patch_shape, 184 split=split, 185 offsets=offsets, 186 boundaries=boundaries, 187 binary=binary, 188 download=download, 189 **ds_kwargs 190 ) 191 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the DIC HepG2 dataloader for segmenting cells in differential interference contrast microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', 'val' or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.