torch_em.data.datasets.light_microscopy.celegans_atlas
CElegans Atlas is a dataset that contains nucleus annotations in 3d confocal microscopy images.
The preprocessed dataset is located at https://zenodo.org/records/5942575. The raw images are from the publication https://doi.org/10.1038/nmeth.1366. The nucleus annotation masks were generated in the publication https://arxiv.org/abs/2002.02857. And the available data splits were made by the following publication https://arxiv.org/abs/1908.03636.
Please cite them all if you use this dataset for your research.
1"""CElegans Atlas is a dataset that contains nucleus annotations in 3d confocal microscopy images. 2 3The preprocessed dataset is located at https://zenodo.org/records/5942575. 4The raw images are from the publication https://doi.org/10.1038/nmeth.1366. 5The nucleus annotation masks were generated in the publication https://arxiv.org/abs/2002.02857. 6And the available data splits were made by the following publication https://arxiv.org/abs/1908.03636. 7 8Please cite them all if you use this dataset for your research. 9""" 10 11import os 12import shutil 13from glob import glob 14from natsort import natsorted 15from typing import Union, Tuple, List, Literal 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21from .. import util 22 23 24URL = "https://zenodo.org/records/5942575/files/c_elegans_nuclei.zip" 25CHECKSUM = "1def07491cdad89e381cbe4437ef03da3af8f78d127e8152cd9b32bdab152c4e" 26 27 28def get_celegans_atlas_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the CElegans Atlas dataset. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 Filepath where the dataset is stored. 37 """ 38 data_dir = os.path.join(path, "c_elegans_nuclei") 39 if os.path.exists(data_dir): 40 return data_dir 41 42 os.makedirs(path, exist_ok=True) 43 44 # Download and unzip the images. 45 zip_path = os.path.join(path, "c_elegans_nuclei.zip") 46 util.download_source(zip_path, url=URL, checksum=CHECKSUM, download=download) 47 util.unzip(zip_path, path) 48 49 # Remove other miscellanous folders. 50 shutil.rmtree(os.path.join(path, "__MACOSX")) 51 52 return data_dir 53 54 55def get_celegans_atlas_paths( 56 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False, 57) -> Tuple[List[str], List[str]]: 58 """Get paths to the CElegans Atlas data. 59 60 Args: 61 path: Filepath to a folder where the downloaded data will be saved. 62 split: The data split to use. Either 'train', 'val' or 'test'. 63 download: Whether to download the data if it is not present. 64 65 Returns: 66 List of filepaths for the image data. 67 List of filepaths for the label data. 68 """ 69 if split not in ["train", "val", "test"]: 70 raise ValueError(f"'{split}' is not a valid data split choice.") 71 72 data_path = get_celegans_atlas_data(path, download) 73 74 raw_paths = natsorted(glob(os.path.join(data_path, split, "images", "*.tif"))) 75 label_paths = natsorted(glob(os.path.join(data_path, split, "masks", "*.tif"))) 76 77 return raw_paths, label_paths 78 79 80def get_celegans_atlas_dataset( 81 path: Union[os.PathLike, str], 82 patch_shape: Tuple[int, ...], 83 split: Literal["train", "val", "test"], 84 download: bool = False, 85 **kwargs, 86) -> Dataset: 87 """Get the CElegans Atlas dataset for nucleus segmentation. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 patch_shape: The patch shape to use for training. 92 split: The data split to use. Either 'train', 'val' or 'test'. 93 download: Whether to download the data if it is not present. 94 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 95 96 Returns: 97 The segmentation dataset. 98 """ 99 raw_paths, label_paths = get_celegans_atlas_paths(path, split, download) 100 101 return torch_em.default_segmentation_dataset( 102 raw_paths=raw_paths, 103 raw_key=None, 104 label_paths=label_paths, 105 label_key=None, 106 patch_shape=patch_shape, 107 **kwargs, 108 ) 109 110 111def get_celegans_atlas_loader( 112 path: Union[os.PathLike, str], 113 batch_size: int, 114 patch_shape: Tuple[int, ...], 115 split: Literal["train", "val", "test"], 116 download: bool = False, 117 **kwargs, 118) -> DataLoader: 119 """Get the CElegans Atlas dataloader for nucleus segmentation. 120 121 Args: 122 path: Filepath to a folder where the downloaded data will be saved. 123 batch_size: The batch size for training. 124 patch_shape: The patch shape to use for training. 125 split: The data split to use. Either 'train', 'val' or 'test'. 126 download: Whether to download the data if it is not present. 127 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 128 129 Returns: 130 The DataLoader. 131 """ 132 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 133 dataset = get_celegans_atlas_dataset(path, patch_shape, split, download, **ds_kwargs) 134 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
29def get_celegans_atlas_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the CElegans Atlas dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 Filepath where the dataset is stored. 38 """ 39 data_dir = os.path.join(path, "c_elegans_nuclei") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 # Download and unzip the images. 46 zip_path = os.path.join(path, "c_elegans_nuclei.zip") 47 util.download_source(zip_path, url=URL, checksum=CHECKSUM, download=download) 48 util.unzip(zip_path, path) 49 50 # Remove other miscellanous folders. 51 shutil.rmtree(os.path.join(path, "__MACOSX")) 52 53 return data_dir
Download the CElegans Atlas dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the dataset is stored.
56def get_celegans_atlas_paths( 57 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False, 58) -> Tuple[List[str], List[str]]: 59 """Get paths to the CElegans Atlas data. 60 61 Args: 62 path: Filepath to a folder where the downloaded data will be saved. 63 split: The data split to use. Either 'train', 'val' or 'test'. 64 download: Whether to download the data if it is not present. 65 66 Returns: 67 List of filepaths for the image data. 68 List of filepaths for the label data. 69 """ 70 if split not in ["train", "val", "test"]: 71 raise ValueError(f"'{split}' is not a valid data split choice.") 72 73 data_path = get_celegans_atlas_data(path, download) 74 75 raw_paths = natsorted(glob(os.path.join(data_path, split, "images", "*.tif"))) 76 label_paths = natsorted(glob(os.path.join(data_path, split, "masks", "*.tif"))) 77 78 return raw_paths, label_paths
Get paths to the CElegans Atlas data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
81def get_celegans_atlas_dataset( 82 path: Union[os.PathLike, str], 83 patch_shape: Tuple[int, ...], 84 split: Literal["train", "val", "test"], 85 download: bool = False, 86 **kwargs, 87) -> Dataset: 88 """Get the CElegans Atlas dataset for nucleus segmentation. 89 90 Args: 91 path: Filepath to a folder where the downloaded data will be saved. 92 patch_shape: The patch shape to use for training. 93 split: The data split to use. Either 'train', 'val' or 'test'. 94 download: Whether to download the data if it is not present. 95 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 96 97 Returns: 98 The segmentation dataset. 99 """ 100 raw_paths, label_paths = get_celegans_atlas_paths(path, split, download) 101 102 return torch_em.default_segmentation_dataset( 103 raw_paths=raw_paths, 104 raw_key=None, 105 label_paths=label_paths, 106 label_key=None, 107 patch_shape=patch_shape, 108 **kwargs, 109 )
Get the CElegans Atlas dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
112def get_celegans_atlas_loader( 113 path: Union[os.PathLike, str], 114 batch_size: int, 115 patch_shape: Tuple[int, ...], 116 split: Literal["train", "val", "test"], 117 download: bool = False, 118 **kwargs, 119) -> DataLoader: 120 """Get the CElegans Atlas dataloader for nucleus segmentation. 121 122 Args: 123 path: Filepath to a folder where the downloaded data will be saved. 124 batch_size: The batch size for training. 125 patch_shape: The patch shape to use for training. 126 split: The data split to use. Either 'train', 'val' or 'test'. 127 download: Whether to download the data if it is not present. 128 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 129 130 Returns: 131 The DataLoader. 132 """ 133 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 134 dataset = get_celegans_atlas_dataset(path, patch_shape, split, download, **ds_kwargs) 135 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the CElegans Atlas dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.