torch_em.data.datasets.light_microscopy.cartocell
The CartoCell dataset contains annotations of cell segmentation in whole epithelial cysts in high-content screening microscopy images.
The dataset is located at https://data.mendeley.com/datasets/7gbkxgngpm/2. This dataset is from the publication https://doi.org/10.1016/j.crmeth.2023.100597. Please cite it if you use this dataset for your research.
1"""The CartoCell dataset contains annotations of cell segmentation in 2whole epithelial cysts in high-content screening microscopy images. 3 4The dataset is located at https://data.mendeley.com/datasets/7gbkxgngpm/2. 5This dataset is from the publication https://doi.org/10.1016/j.crmeth.2023.100597. 6Please cite it if you use this dataset for your research. 7""" 8 9import os 10import shutil 11from glob import glob 12from natsort import natsorted 13from typing import Union, Tuple, Optional, Literal, List 14 15from torch.utils.data import Dataset, DataLoader 16 17import torch_em 18 19from .. import util 20 21 22URL = "https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/7gbkxgngpm-2.zip" 23CHECKSUM = "ca3fc289e7b67febfc03cdd55fd791078f7527820c8dbcee0b98d03d993bb6f5" 24DNAME = "CartoCell, a high-content pipeline for accurate 3D image analysis, unveils cell morphology patterns in epithelial cysts" # noqa 25 26 27def get_cartocell_data(path: Union[os.PathLike, str], download: bool = False): 28 """Download the CartoCell dataset. 29 30 Args: 31 path: Filepath to a folder where the downloaded data will be saved. 32 download: Whether to download the data if it is not present. 33 """ 34 data_dir = os.path.join(path, "data") 35 if os.path.exists(data_dir): 36 return 37 38 os.makedirs(path, exist_ok=True) 39 40 zip_path = os.path.join(path, "cartocell.zip") 41 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 42 util.unzip(zip_path=zip_path, dst=path) 43 shutil.move(src=os.path.join(path, DNAME), dst=data_dir) 44 45 46def get_cartocell_paths( 47 path: Union[os.PathLike, str], 48 split: Optional[Literal["train", "test"]] = None, 49 name: Optional[Literal["eggChambers", "embryoids", "MDCK-Normoxia", "MDCK-Hypoxia"]] = None, 50 download: bool = False 51) -> Tuple[List[str], List[str]]: 52 """Get paths to the CartoCell data. 53 54 Args: 55 path: Filepath to a folder where the downloaded data will be saved. 56 split: The data split to use. Either 'train', or 'test'. 57 name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 get_cartocell_data(path, download) 65 66 if split is None: 67 split = "" 68 else: 69 split = split + "_" 70 71 if name is None: 72 name = "*" 73 elif name == "MDCK-Hypoxia": 74 raise ValueError(f"'{name}' has mismatching shapes for image and corresponding labels.") 75 76 raw_paths = natsorted(glob(os.path.join(path, "data", f"low-resolution_{name}_{split}raw_images", "*"))) 77 78 # NOTE: The 'MDCK-Hypoxia' inputs have mismatching input-label shapes (and axes seem interchanged) 79 raw_paths = [rpath for rpath in raw_paths if rpath.find("MDCK-Hypoxia") == -1] 80 label_paths = [rpath.replace("raw", "label") for rpath in raw_paths] 81 82 assert len(raw_paths) > 0 and len(raw_paths) == len(label_paths) 83 84 return raw_paths, label_paths 85 86 87def get_cartocell_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, ...], 90 split: Optional[Literal["train", "test"]] = None, 91 name: Optional[Literal["eggChambers", "embryoids", "MDCK-Normoxia", "MDCK-Hypoxia"]] = None, 92 download: bool = False, **kwargs 93) -> Dataset: 94 """Get the CartoCell dataset for cell segmentation. 95 96 Args: 97 path: Filepath to a folder where the downloaded data will be saved. 98 patch_shape: The patch shape to use for training. 99 split: The data split to use. Either 'train', or 'test'. 100 name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'. 101 download: Whether to download the data if it is not present. 102 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 103 104 Returns: 105 The segmentation dataset. 106 """ 107 raw_paths, label_paths = get_cartocell_paths(path, split, name, download) 108 109 return torch_em.default_segmentation_dataset( 110 raw_paths=raw_paths, 111 raw_key=None, 112 label_paths=label_paths, 113 label_key=None, 114 patch_shape=patch_shape, 115 is_seg_dataset=True, 116 **kwargs 117 ) 118 119 120def get_cartocell_loader( 121 path: Union[os.PathLike, str], 122 batch_size: int, 123 patch_shape: Tuple[int, ...], 124 split: Optional[Literal["train", "test"]] = None, 125 name: Optional[Literal["eggChambers", "embryoids", "MDCK-Normoxia", "MDCK-Hypoxia"]] = None, 126 download: bool = False, 127 **kwargs 128) -> DataLoader: 129 """Get the CartoCell dataloader for cell segmentation. 130 131 Args: 132 path: Filepath to a folder where the downloaded data will be saved. 133 batch_size: The batch size for training. 134 patch_shape: The patch shape to use for training. 135 split: The data split to use. Either 'train', or 'test'. 136 name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'. 137 download: Whether to download the data if it is not present. 138 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 139 140 Returns: 141 The DataLoader. 142 """ 143 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 144 dataset = get_cartocell_dataset(path, patch_shape, split, name, download, **ds_kwargs) 145 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/7gbkxgngpm-2.zip'
CHECKSUM =
'ca3fc289e7b67febfc03cdd55fd791078f7527820c8dbcee0b98d03d993bb6f5'
DNAME =
'CartoCell, a high-content pipeline for accurate 3D image analysis, unveils cell morphology patterns in epithelial cysts'
def
get_cartocell_data(path: Union[os.PathLike, str], download: bool = False):
28def get_cartocell_data(path: Union[os.PathLike, str], download: bool = False): 29 """Download the CartoCell dataset. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 """ 35 data_dir = os.path.join(path, "data") 36 if os.path.exists(data_dir): 37 return 38 39 os.makedirs(path, exist_ok=True) 40 41 zip_path = os.path.join(path, "cartocell.zip") 42 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip(zip_path=zip_path, dst=path) 44 shutil.move(src=os.path.join(path, DNAME), dst=data_dir)
Download the CartoCell dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
def
get_cartocell_paths( path: Union[os.PathLike, str], split: Optional[Literal['train', 'test']] = None, name: Optional[Literal['eggChambers', 'embryoids', 'MDCK-Normoxia', 'MDCK-Hypoxia']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
47def get_cartocell_paths( 48 path: Union[os.PathLike, str], 49 split: Optional[Literal["train", "test"]] = None, 50 name: Optional[Literal["eggChambers", "embryoids", "MDCK-Normoxia", "MDCK-Hypoxia"]] = None, 51 download: bool = False 52) -> Tuple[List[str], List[str]]: 53 """Get paths to the CartoCell data. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 split: The data split to use. Either 'train', or 'test'. 58 name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'. 59 download: Whether to download the data if it is not present. 60 61 Returns: 62 List of filepaths for the image data. 63 List of filepaths for the label data. 64 """ 65 get_cartocell_data(path, download) 66 67 if split is None: 68 split = "" 69 else: 70 split = split + "_" 71 72 if name is None: 73 name = "*" 74 elif name == "MDCK-Hypoxia": 75 raise ValueError(f"'{name}' has mismatching shapes for image and corresponding labels.") 76 77 raw_paths = natsorted(glob(os.path.join(path, "data", f"low-resolution_{name}_{split}raw_images", "*"))) 78 79 # NOTE: The 'MDCK-Hypoxia' inputs have mismatching input-label shapes (and axes seem interchanged) 80 raw_paths = [rpath for rpath in raw_paths if rpath.find("MDCK-Hypoxia") == -1] 81 label_paths = [rpath.replace("raw", "label") for rpath in raw_paths] 82 83 assert len(raw_paths) > 0 and len(raw_paths) == len(label_paths) 84 85 return raw_paths, label_paths
Get paths to the CartoCell data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', or 'test'.
- name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_cartocell_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Optional[Literal['train', 'test']] = None, name: Optional[Literal['eggChambers', 'embryoids', 'MDCK-Normoxia', 'MDCK-Hypoxia']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
88def get_cartocell_dataset( 89 path: Union[os.PathLike, str], 90 patch_shape: Tuple[int, ...], 91 split: Optional[Literal["train", "test"]] = None, 92 name: Optional[Literal["eggChambers", "embryoids", "MDCK-Normoxia", "MDCK-Hypoxia"]] = None, 93 download: bool = False, **kwargs 94) -> Dataset: 95 """Get the CartoCell dataset for cell segmentation. 96 97 Args: 98 path: Filepath to a folder where the downloaded data will be saved. 99 patch_shape: The patch shape to use for training. 100 split: The data split to use. Either 'train', or 'test'. 101 name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'. 102 download: Whether to download the data if it is not present. 103 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 104 105 Returns: 106 The segmentation dataset. 107 """ 108 raw_paths, label_paths = get_cartocell_paths(path, split, name, download) 109 110 return torch_em.default_segmentation_dataset( 111 raw_paths=raw_paths, 112 raw_key=None, 113 label_paths=label_paths, 114 label_key=None, 115 patch_shape=patch_shape, 116 is_seg_dataset=True, 117 **kwargs 118 )
Get the CartoCell dataset for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', or 'test'.
- name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_cartocell_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Optional[Literal['train', 'test']] = None, name: Optional[Literal['eggChambers', 'embryoids', 'MDCK-Normoxia', 'MDCK-Hypoxia']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
121def get_cartocell_loader( 122 path: Union[os.PathLike, str], 123 batch_size: int, 124 patch_shape: Tuple[int, ...], 125 split: Optional[Literal["train", "test"]] = None, 126 name: Optional[Literal["eggChambers", "embryoids", "MDCK-Normoxia", "MDCK-Hypoxia"]] = None, 127 download: bool = False, 128 **kwargs 129) -> DataLoader: 130 """Get the CartoCell dataloader for cell segmentation. 131 132 Args: 133 path: Filepath to a folder where the downloaded data will be saved. 134 batch_size: The batch size for training. 135 patch_shape: The patch shape to use for training. 136 split: The data split to use. Either 'train', or 'test'. 137 name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'. 138 download: Whether to download the data if it is not present. 139 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 140 141 Returns: 142 The DataLoader. 143 """ 144 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 145 dataset = get_cartocell_dataset(path, patch_shape, split, name, download, **ds_kwargs) 146 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the CartoCell dataloader for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', or 'test'.
- name: The name of data subset. Either 'eggChambers', 'embryoids', 'MDCK-Normoxia' or 'MDCK-Hypoxia'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.