torch_em.data.datasets.light_microscopy.organoidnet
The OrganoIDNet dataset contains annotations of panceratic organoids.
This dataset is from the publication https://doi.org/10.1007/s13402-024-00958-2. Please cite it if you use this dataset for a publication.
1"""The OrganoIDNet dataset contains annotations of panceratic organoids. 2 3This dataset is from the publication https://doi.org/10.1007/s13402-024-00958-2. 4Please cite it if you use this dataset for a publication. 5""" 6 7 8import os 9import shutil 10import zipfile 11from glob import glob 12from typing import Tuple, Union, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://zenodo.org/records/10643410/files/OrganoIDNetData.zip?download=1" 22CHECKSUM = "3cd9239bf74bda096ecb5b7bdb95f800c7fa30b9937f9aba6ddf98d754cbfa3d" 23 24 25def get_organoidnet_data(path: Union[os.PathLike, str], split: str, download: bool = False) -> str: 26 """Download the OrganoIDNet dataset. 27 28 Args: 29 path: Filepath to the folder where the downloaded data will be saved. 30 split: The data split to use. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 The filepath where the data is downloaded. 35 """ 36 splits = ["Training", "Validation", "Test"] 37 assert split in splits 38 39 os.makedirs(path, exist_ok=True) 40 41 data_dir = os.path.join(path, split) 42 if os.path.exists(data_dir): 43 return data_dir 44 45 # Download and extraction. 46 zip_path = os.path.join(path, "OrganoIDNetData.zip") 47 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 48 49 # Only "Training", "Test", "Validation" from the zip are relevant and need to be extracted. 50 # They are in "/OrganoIDNetData/Dataset/" 51 prefix = "OrganoIDNetData/Dataset/" 52 for dl_split in splits: 53 54 dl_prefix = prefix + dl_split 55 56 with zipfile.ZipFile(zip_path) as archive: 57 for ff in archive.namelist(): 58 if ff.startswith(dl_prefix): 59 archive.extract(ff, path) 60 61 for dl_split in splits: 62 shutil.move( 63 os.path.join(path, "OrganoIDNetData/Dataset", dl_split), 64 os.path.join(path, dl_split) 65 ) 66 67 assert os.path.exists(data_dir) 68 69 # os.remove(zip_path) 70 return data_dir 71 72 73def get_organoidnet_paths( 74 path: Union[os.PathLike, str], split: str, download: bool = False 75) -> Tuple[List[str], List[str]]: 76 """Get paths to the OrganoIDNet data. 77 78 Args: 79 path: Filepath to the folder where the downloaded data will be saved. 80 split: The data split to use. 81 download: Whether to download the data if it is not present. 82 83 Returns: 84 List of filepaths for the image data. 85 List of filepaths for the label data. 86 """ 87 data_dir = get_organoidnet_data(path=path, split=split, download=download) 88 89 image_paths = sorted(glob(os.path.join(data_dir, "Images", "*.tif"))) 90 label_paths = sorted(glob(os.path.join(data_dir, "Masks", "*.tif"))) 91 92 return image_paths, label_paths 93 94 95def get_organoidnet_dataset( 96 path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs 97) -> Dataset: 98 """Get the OrganoIDNet dataset for organoid segmentation in microscopy images. 99 100 Args: 101 path: Filepath to a folder where the downloaded data will be saved. 102 split: The data split to use. 103 patch_shape: The patch shape to use for training. 104 download: Whether to download the data if it is not present. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 106 107 Returns: 108 The segmentation dataset. 109 """ 110 image_paths, label_paths = get_organoidnet_paths(path, split, download) 111 112 return torch_em.default_segmentation_dataset( 113 raw_paths=image_paths, 114 raw_key=None, 115 label_paths=label_paths, 116 label_key=None, 117 patch_shape=patch_shape, 118 is_seg_dataset=False, 119 **kwargs 120 ) 121 122 123def get_organoidnet_loader( 124 path: Union[os.PathLike, str], 125 split: str, 126 patch_shape: Tuple[int, int], 127 batch_size: int, 128 download: bool = False, 129 **kwargs 130) -> DataLoader: 131 """Get the OrganoIDNet dataset for organoid segmentation in microscopy images. 132 133 Args: 134 path: Filepath to a folder where the downloaded data will be saved. 135 split: The data split to use. 136 patch_shape: The patch shape to use for training. 137 batch_size: The batch size for training. 138 download: Whether to download the data if it is not present. 139 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 140 141 Returns: 142 The DataLoader. 143 """ 144 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 145 dataset = get_organoidnet_dataset( 146 path=path, split=split, patch_shape=patch_shape, download=download, **ds_kwargs 147 ) 148 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/10643410/files/OrganoIDNetData.zip?download=1'
CHECKSUM =
'3cd9239bf74bda096ecb5b7bdb95f800c7fa30b9937f9aba6ddf98d754cbfa3d'
def
get_organoidnet_data(path: Union[os.PathLike, str], split: str, download: bool = False) -> str:
26def get_organoidnet_data(path: Union[os.PathLike, str], split: str, download: bool = False) -> str: 27 """Download the OrganoIDNet dataset. 28 29 Args: 30 path: Filepath to the folder where the downloaded data will be saved. 31 split: The data split to use. 32 download: Whether to download the data if it is not present. 33 34 Returns: 35 The filepath where the data is downloaded. 36 """ 37 splits = ["Training", "Validation", "Test"] 38 assert split in splits 39 40 os.makedirs(path, exist_ok=True) 41 42 data_dir = os.path.join(path, split) 43 if os.path.exists(data_dir): 44 return data_dir 45 46 # Download and extraction. 47 zip_path = os.path.join(path, "OrganoIDNetData.zip") 48 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 49 50 # Only "Training", "Test", "Validation" from the zip are relevant and need to be extracted. 51 # They are in "/OrganoIDNetData/Dataset/" 52 prefix = "OrganoIDNetData/Dataset/" 53 for dl_split in splits: 54 55 dl_prefix = prefix + dl_split 56 57 with zipfile.ZipFile(zip_path) as archive: 58 for ff in archive.namelist(): 59 if ff.startswith(dl_prefix): 60 archive.extract(ff, path) 61 62 for dl_split in splits: 63 shutil.move( 64 os.path.join(path, "OrganoIDNetData/Dataset", dl_split), 65 os.path.join(path, dl_split) 66 ) 67 68 assert os.path.exists(data_dir) 69 70 # os.remove(zip_path) 71 return data_dir
Download the OrganoIDNet dataset.
Arguments:
- path: Filepath to the folder where the downloaded data will be saved.
- split: The data split to use.
- download: Whether to download the data if it is not present.
Returns:
The filepath where the data is downloaded.
def
get_organoidnet_paths( path: Union[os.PathLike, str], split: str, download: bool = False) -> Tuple[List[str], List[str]]:
74def get_organoidnet_paths( 75 path: Union[os.PathLike, str], split: str, download: bool = False 76) -> Tuple[List[str], List[str]]: 77 """Get paths to the OrganoIDNet data. 78 79 Args: 80 path: Filepath to the folder where the downloaded data will be saved. 81 split: The data split to use. 82 download: Whether to download the data if it is not present. 83 84 Returns: 85 List of filepaths for the image data. 86 List of filepaths for the label data. 87 """ 88 data_dir = get_organoidnet_data(path=path, split=split, download=download) 89 90 image_paths = sorted(glob(os.path.join(data_dir, "Images", "*.tif"))) 91 label_paths = sorted(glob(os.path.join(data_dir, "Masks", "*.tif"))) 92 93 return image_paths, label_paths
Get paths to the OrganoIDNet data.
Arguments:
- path: Filepath to the folder where the downloaded data will be saved.
- split: The data split to use.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_organoidnet_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
96def get_organoidnet_dataset( 97 path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs 98) -> Dataset: 99 """Get the OrganoIDNet dataset for organoid segmentation in microscopy images. 100 101 Args: 102 path: Filepath to a folder where the downloaded data will be saved. 103 split: The data split to use. 104 patch_shape: The patch shape to use for training. 105 download: Whether to download the data if it is not present. 106 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 107 108 Returns: 109 The segmentation dataset. 110 """ 111 image_paths, label_paths = get_organoidnet_paths(path, split, download) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=image_paths, 115 raw_key=None, 116 label_paths=label_paths, 117 label_key=None, 118 patch_shape=patch_shape, 119 is_seg_dataset=False, 120 **kwargs 121 )
Get the OrganoIDNet dataset for organoid segmentation in microscopy images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_organoidnet_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
124def get_organoidnet_loader( 125 path: Union[os.PathLike, str], 126 split: str, 127 patch_shape: Tuple[int, int], 128 batch_size: int, 129 download: bool = False, 130 **kwargs 131) -> DataLoader: 132 """Get the OrganoIDNet dataset for organoid segmentation in microscopy images. 133 134 Args: 135 path: Filepath to a folder where the downloaded data will be saved. 136 split: The data split to use. 137 patch_shape: The patch shape to use for training. 138 batch_size: The batch size for training. 139 download: Whether to download the data if it is not present. 140 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 141 142 Returns: 143 The DataLoader. 144 """ 145 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 146 dataset = get_organoidnet_dataset( 147 path=path, split=split, patch_shape=patch_shape, download=download, **ds_kwargs 148 ) 149 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the OrganoIDNet dataset for organoid segmentation in microscopy images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.