torch_em.data.datasets.light_microscopy.nis3d
The NIS3D dataset contains fluorescence microscopy volumetric images of multiple species (drosophila, zebrafish, etc) for nucleus segmentation.
The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D. And the dataset is open-sourced at https://zenodo.org/records/11456029.
Please cite them if you use this dataset for your research.
1"""The NIS3D dataset contains fluorescence microscopy volumetric images of 2multiple species (drosophila, zebrafish, etc) for nucleus segmentation. 3 4The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html 5The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D. 6And the dataset is open-sourced at https://zenodo.org/records/11456029. 7 8Please cite them if you use this dataset for your research. 9""" # noqa 10 11import os 12import shutil 13from glob import glob 14from natsort import natsorted 15from typing import Union, Tuple, List, Literal, Optional 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21from .. import util 22 23 24URL = "https://zenodo.org/records/11456029/files/NIS3D.zip" 25CHECKSUM = "3eb60b48eba87a5eeb71e9676d6df64296adc3dd93234a1db80cd9a0da28cd83" 26 27 28def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the NIS3D dataset. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The filepath for the downloaded data. 37 """ 38 data_dir = os.path.join(path, "NIS3D") 39 if os.path.exists(data_dir): 40 return data_dir 41 42 os.makedirs(path, exist_ok=True) 43 44 zip_path = os.path.join(path, "NIS3D.zip") 45 util.download_source(zip_path, URL, download, CHECKSUM) 46 util.unzip(zip_path, path) 47 48 # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually. 49 gt_paths = glob(os.path.join(data_dir, "**", "MusMusculus_2", "gt.tif"), recursive=True) 50 assert gt_paths, "Such mismatching paths should exist!" 51 [shutil.move(src=p, dst=p.replace("gt", "GroundTruth")) for p in gt_paths] 52 53 return data_dir 54 55 56def get_nis3d_paths( 57 path: Union[os.PathLike, str], 58 split: Optional[Literal["train", "test"]] = None, 59 split_type: Optional[Literal["cross-image", "in-image"]] = None, 60 download: bool = False, 61) -> Tuple[List[str], List[str]]: 62 """Get paths to the NIS3D data. 63 64 Args: 65 path: Filepath to a folder where the downloaded data will be saved. 66 split: The choice of data split. By default, all volumes are returned. 67 split_type: The choice of the type of data split. By default, we get all the volumes as is. 68 download: Whether to download the data if it is not present. 69 70 Returns: 71 List of filepaths for the image data. 72 List of filepaths for the label data. 73 """ 74 data_dir = get_nis3d_data(path, download) 75 76 # First, let's set the 'split_type' analogy 77 if split_type is None: # We expect original volumes as is with no splitting pattern. 78 assert split is None, "Please choose a 'split_type' before making a choice on the 'split'." 79 split_type = "NIS3D" 80 else: 81 split_type = r"suggestive splitting/" + split_type 82 83 # Next, let's decide on the particular 'split' to be chosen. 84 if split is None: 85 split = "**" 86 else: 87 split += "/*" 88 89 raw_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "data.tif"), recursive=True)) 90 label_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "GroundTruth.tif"), recursive=True)) 91 92 assert len(raw_paths) and len(raw_paths) == len(label_paths) 93 94 return raw_paths, label_paths 95 96 97def get_nis3d_dataset( 98 path: Union[os.PathLike, str], 99 patch_shape: Tuple[int, ...], 100 split: Optional[Literal["train", "test"]] = None, 101 split_type: Optional[Literal["cross-image", "in-image"]] = None, 102 download: bool = False, 103 **kwargs 104) -> Dataset: 105 """Get the NIS3D dataset for nucleus segmentation. 106 107 Args: 108 path: Filepath to a folder where the downloaded data will be saved. 109 patch_shape: The patch shape to use for training. 110 split: The choice of data split. By default, all volumes are returned. 111 split_type: The choice of the type of data split. By default, we get all the volumes as is. 112 download: Whether to download the data if it is not present. 113 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 114 115 Returns: 116 The segmentation dataset. 117 """ 118 119 raw_paths, label_paths = get_nis3d_paths(path, split, split_type, download) 120 121 return torch_em.default_segmentation_dataset( 122 raw_paths=raw_paths, 123 raw_key=None, 124 label_paths=label_paths, 125 label_key=None, 126 is_seg_dataset=True, 127 patch_shape=patch_shape, 128 **kwargs 129 ) 130 131 132def get_nis3d_loader( 133 path: Union[os.PathLike, str], 134 batch_size: int, 135 patch_shape: Tuple[int, ...], 136 split: Optional[Literal["train", "test"]] = None, 137 split_type: Optional[Literal["cross-image", "in-image"]] = None, 138 download: bool = False, **kwargs, 139) -> DataLoader: 140 """Get the NIS3D dataloader for nucleus segmentation. 141 142 Args: 143 path: Filepath to a folder where the downloaded data will be saved. 144 batch_size: The batch size for training. 145 patch_shape: The patch shape to use for training. 146 split: The choice of data split. By default, all volumes are returned. 147 split_type: The choice of the type of data split. By default, we get all the volumes as is. 148 download: Whether to download the data if it is not present. 149 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 150 151 Returns: 152 The DataLoader 153 """ 154 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 155 dataset = get_nis3d_dataset(path, patch_shape, split, split_type, download, **ds_kwargs) 156 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
29def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the NIS3D dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The filepath for the downloaded data. 38 """ 39 data_dir = os.path.join(path, "NIS3D") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "NIS3D.zip") 46 util.download_source(zip_path, URL, download, CHECKSUM) 47 util.unzip(zip_path, path) 48 49 # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually. 50 gt_paths = glob(os.path.join(data_dir, "**", "MusMusculus_2", "gt.tif"), recursive=True) 51 assert gt_paths, "Such mismatching paths should exist!" 52 [shutil.move(src=p, dst=p.replace("gt", "GroundTruth")) for p in gt_paths] 53 54 return data_dir
Download the NIS3D dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the downloaded data.
57def get_nis3d_paths( 58 path: Union[os.PathLike, str], 59 split: Optional[Literal["train", "test"]] = None, 60 split_type: Optional[Literal["cross-image", "in-image"]] = None, 61 download: bool = False, 62) -> Tuple[List[str], List[str]]: 63 """Get paths to the NIS3D data. 64 65 Args: 66 path: Filepath to a folder where the downloaded data will be saved. 67 split: The choice of data split. By default, all volumes are returned. 68 split_type: The choice of the type of data split. By default, we get all the volumes as is. 69 download: Whether to download the data if it is not present. 70 71 Returns: 72 List of filepaths for the image data. 73 List of filepaths for the label data. 74 """ 75 data_dir = get_nis3d_data(path, download) 76 77 # First, let's set the 'split_type' analogy 78 if split_type is None: # We expect original volumes as is with no splitting pattern. 79 assert split is None, "Please choose a 'split_type' before making a choice on the 'split'." 80 split_type = "NIS3D" 81 else: 82 split_type = r"suggestive splitting/" + split_type 83 84 # Next, let's decide on the particular 'split' to be chosen. 85 if split is None: 86 split = "**" 87 else: 88 split += "/*" 89 90 raw_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "data.tif"), recursive=True)) 91 label_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "GroundTruth.tif"), recursive=True)) 92 93 assert len(raw_paths) and len(raw_paths) == len(label_paths) 94 95 return raw_paths, label_paths
Get paths to the NIS3D data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The choice of data split. By default, all volumes are returned.
- split_type: The choice of the type of data split. By default, we get all the volumes as is.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
98def get_nis3d_dataset( 99 path: Union[os.PathLike, str], 100 patch_shape: Tuple[int, ...], 101 split: Optional[Literal["train", "test"]] = None, 102 split_type: Optional[Literal["cross-image", "in-image"]] = None, 103 download: bool = False, 104 **kwargs 105) -> Dataset: 106 """Get the NIS3D dataset for nucleus segmentation. 107 108 Args: 109 path: Filepath to a folder where the downloaded data will be saved. 110 patch_shape: The patch shape to use for training. 111 split: The choice of data split. By default, all volumes are returned. 112 split_type: The choice of the type of data split. By default, we get all the volumes as is. 113 download: Whether to download the data if it is not present. 114 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 115 116 Returns: 117 The segmentation dataset. 118 """ 119 120 raw_paths, label_paths = get_nis3d_paths(path, split, split_type, download) 121 122 return torch_em.default_segmentation_dataset( 123 raw_paths=raw_paths, 124 raw_key=None, 125 label_paths=label_paths, 126 label_key=None, 127 is_seg_dataset=True, 128 patch_shape=patch_shape, 129 **kwargs 130 )
Get the NIS3D dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The choice of data split. By default, all volumes are returned.
- split_type: The choice of the type of data split. By default, we get all the volumes as is.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
133def get_nis3d_loader( 134 path: Union[os.PathLike, str], 135 batch_size: int, 136 patch_shape: Tuple[int, ...], 137 split: Optional[Literal["train", "test"]] = None, 138 split_type: Optional[Literal["cross-image", "in-image"]] = None, 139 download: bool = False, **kwargs, 140) -> DataLoader: 141 """Get the NIS3D dataloader for nucleus segmentation. 142 143 Args: 144 path: Filepath to a folder where the downloaded data will be saved. 145 batch_size: The batch size for training. 146 patch_shape: The patch shape to use for training. 147 split: The choice of data split. By default, all volumes are returned. 148 split_type: The choice of the type of data split. By default, we get all the volumes as is. 149 download: Whether to download the data if it is not present. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 151 152 Returns: 153 The DataLoader 154 """ 155 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 156 dataset = get_nis3d_dataset(path, patch_shape, split, split_type, download, **ds_kwargs) 157 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the NIS3D dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split. By default, all volumes are returned.
- split_type: The choice of the type of data split. By default, we get all the volumes as is.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader