torch_em.data.datasets.light_microscopy.nis3d
The NIS3D dataset contains fluorescence microscopy volumetric images of multiple species (drosophila, zebrafish, etc) for nucleus segmentation.
The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D. And the dataset is open-sourced at https://zenodo.org/records/11456029.
Please cite them if you use this dataset for your research.
1"""The NIS3D dataset contains fluorescence microscopy volumetric images of 2multiple species (drosophila, zebrafish, etc) for nucleus segmentation. 3 4The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html 5The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D. 6And the dataset is open-sourced at https://zenodo.org/records/11456029. 7 8Please cite them if you use this dataset for your research. 9""" # noqa 10 11import os 12import shutil 13from glob import glob 14from natsort import natsorted 15from typing import Union, Tuple, List 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21from .. import util 22 23 24URL = "https://zenodo.org/records/11456029/files/NIS3D.zip" 25CHECKSUM = "3eb60b48eba87a5eeb71e9676d6df64296adc3dd93234a1db80cd9a0da28cd83" 26 27 28def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the NIS3D dataset. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The filepath for the downloaded data. 37 """ 38 data_dir = os.path.join(path, "NIS3D") 39 if os.path.exists(data_dir): 40 return data_dir 41 42 os.makedirs(path, exist_ok=True) 43 44 zip_path = os.path.join(path, "NIS3D.zip") 45 util.download_source(zip_path, URL, download, CHECKSUM) 46 util.unzip(zip_path, path) 47 48 # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually. 49 gt_path = os.path.join(data_dir, "NIS3D", "MusMusculus_2", "gt.tif") 50 shutil.move(src=gt_path, dst=gt_path.replace("gt", "GroundTruth")) 51 52 return data_dir 53 54 55def get_nis3d_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 56 """Get paths to the NIS3D data. 57 58 Args: 59 path: Filepath to a folder where the downloaded data will be saved. 60 download: Whether to download the data if it is not present. 61 62 Returns: 63 List of filepaths for the image data. 64 List of filepaths for the label data. 65 """ 66 data_dir = get_nis3d_data(path, download) 67 68 raw_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "data.tif"))) 69 label_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "GroundTruth.tif"))) 70 71 assert len(raw_paths) and len(raw_paths) == len(label_paths) 72 73 return raw_paths, label_paths 74 75 76def get_nis3d_dataset( 77 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs, 78) -> Dataset: 79 """Get the NIS3D dataset for nucleus segmentation. 80 81 Args: 82 path: Filepath to a folder where the downloaded data will be saved. 83 patch_shape: The patch shape to use for training. 84 download: Whether to download the data if it is not present. 85 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 86 87 Returns: 88 The segmentation dataset. 89 """ 90 91 raw_paths, label_paths = get_nis3d_paths(path, download) 92 93 return torch_em.default_segmentation_dataset( 94 raw_paths=raw_paths, 95 raw_key=None, 96 label_paths=label_paths, 97 label_key=None, 98 is_seg_dataset=True, 99 patch_shape=patch_shape, 100 **kwargs 101 ) 102 103 104def get_nis3d_loader( 105 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs, 106) -> DataLoader: 107 """Get the NIS3D dataloader for nucleus segmentation. 108 109 Args: 110 path: Filepath to a folder where the downloaded data will be saved. 111 batch_size: The batch size for training. 112 patch_shape: The patch shape to use for training. 113 download: Whether to download the data if it is not present. 114 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 115 116 Returns: 117 The DataLoader 118 """ 119 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 120 dataset = get_nis3d_dataset(path, patch_shape, download, **ds_kwargs) 121 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
29def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the NIS3D dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The filepath for the downloaded data. 38 """ 39 data_dir = os.path.join(path, "NIS3D") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "NIS3D.zip") 46 util.download_source(zip_path, URL, download, CHECKSUM) 47 util.unzip(zip_path, path) 48 49 # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually. 50 gt_path = os.path.join(data_dir, "NIS3D", "MusMusculus_2", "gt.tif") 51 shutil.move(src=gt_path, dst=gt_path.replace("gt", "GroundTruth")) 52 53 return data_dir
Download the NIS3D dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the downloaded data.
56def get_nis3d_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 57 """Get paths to the NIS3D data. 58 59 Args: 60 path: Filepath to a folder where the downloaded data will be saved. 61 download: Whether to download the data if it is not present. 62 63 Returns: 64 List of filepaths for the image data. 65 List of filepaths for the label data. 66 """ 67 data_dir = get_nis3d_data(path, download) 68 69 raw_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "data.tif"))) 70 label_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "GroundTruth.tif"))) 71 72 assert len(raw_paths) and len(raw_paths) == len(label_paths) 73 74 return raw_paths, label_paths
Get paths to the NIS3D data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
77def get_nis3d_dataset( 78 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs, 79) -> Dataset: 80 """Get the NIS3D dataset for nucleus segmentation. 81 82 Args: 83 path: Filepath to a folder where the downloaded data will be saved. 84 patch_shape: The patch shape to use for training. 85 download: Whether to download the data if it is not present. 86 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 87 88 Returns: 89 The segmentation dataset. 90 """ 91 92 raw_paths, label_paths = get_nis3d_paths(path, download) 93 94 return torch_em.default_segmentation_dataset( 95 raw_paths=raw_paths, 96 raw_key=None, 97 label_paths=label_paths, 98 label_key=None, 99 is_seg_dataset=True, 100 patch_shape=patch_shape, 101 **kwargs 102 )
Get the NIS3D dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
105def get_nis3d_loader( 106 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs, 107) -> DataLoader: 108 """Get the NIS3D dataloader for nucleus segmentation. 109 110 Args: 111 path: Filepath to a folder where the downloaded data will be saved. 112 batch_size: The batch size for training. 113 patch_shape: The patch shape to use for training. 114 download: Whether to download the data if it is not present. 115 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 116 117 Returns: 118 The DataLoader 119 """ 120 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 121 dataset = get_nis3d_dataset(path, patch_shape, download, **ds_kwargs) 122 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the NIS3D dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader