torch_em.data.datasets.light_microscopy.nis3d

The NIS3D dataset contains fluorescence microscopy volumetric images of multiple species (drosophila, zebrafish, etc) for nucleus segmentation.

The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D. And the dataset is open-sourced at https://zenodo.org/records/11456029.

Please cite them if you use this dataset for your research.

  1"""The NIS3D dataset contains fluorescence microscopy volumetric images of
  2multiple species (drosophila, zebrafish, etc) for nucleus segmentation.
  3
  4The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html
  5The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D.
  6And the dataset is open-sourced at https://zenodo.org/records/11456029.
  7
  8Please cite them if you use this dataset for your research.
  9"""  # noqa
 10
 11import os
 12import shutil
 13from glob import glob
 14from natsort import natsorted
 15from typing import Union, Tuple, List
 16
 17from torch.utils.data import Dataset, DataLoader
 18
 19import torch_em
 20
 21from .. import util
 22
 23
 24URL = "https://zenodo.org/records/11456029/files/NIS3D.zip"
 25CHECKSUM = "3eb60b48eba87a5eeb71e9676d6df64296adc3dd93234a1db80cd9a0da28cd83"
 26
 27
 28def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the NIS3D dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        The filepath for the downloaded data.
 37    """
 38    data_dir = os.path.join(path, "NIS3D")
 39    if os.path.exists(data_dir):
 40        return data_dir
 41
 42    os.makedirs(path, exist_ok=True)
 43
 44    zip_path = os.path.join(path, "NIS3D.zip")
 45    util.download_source(zip_path, URL, download, CHECKSUM)
 46    util.unzip(zip_path, path)
 47
 48    # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually.
 49    gt_path = os.path.join(data_dir, "NIS3D", "MusMusculus_2", "gt.tif")
 50    shutil.move(src=gt_path, dst=gt_path.replace("gt", "GroundTruth"))
 51
 52    return data_dir
 53
 54
 55def get_nis3d_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 56    """Get paths to the NIS3D data.
 57
 58    Args:
 59        path: Filepath to a folder where the downloaded data will be saved.
 60        download: Whether to download the data if it is not present.
 61
 62    Returns:
 63        List of filepaths for the image data.
 64        List of filepaths for the label data.
 65    """
 66    data_dir = get_nis3d_data(path, download)
 67
 68    raw_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "data.tif")))
 69    label_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "GroundTruth.tif")))
 70
 71    assert len(raw_paths) and len(raw_paths) == len(label_paths)
 72
 73    return raw_paths, label_paths
 74
 75
 76def get_nis3d_dataset(
 77    path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs,
 78) -> Dataset:
 79    """Get the NIS3D dataset for nucleus segmentation.
 80
 81    Args:
 82        path: Filepath to a folder where the downloaded data will be saved.
 83        patch_shape: The patch shape to use for training.
 84        download: Whether to download the data if it is not present.
 85        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 86
 87    Returns:
 88        The segmentation dataset.
 89    """
 90
 91    raw_paths, label_paths = get_nis3d_paths(path, download)
 92
 93    return torch_em.default_segmentation_dataset(
 94        raw_paths=raw_paths,
 95        raw_key=None,
 96        label_paths=label_paths,
 97        label_key=None,
 98        is_seg_dataset=True,
 99        patch_shape=patch_shape,
100        **kwargs
101    )
102
103
104def get_nis3d_loader(
105    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs,
106) -> DataLoader:
107    """Get the NIS3D dataloader for nucleus segmentation.
108
109    Args:
110        path: Filepath to a folder where the downloaded data will be saved.
111        batch_size: The batch size for training.
112        patch_shape: The patch shape to use for training.
113        download: Whether to download the data if it is not present.
114        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
115
116    Returns:
117        The DataLoader
118    """
119    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
120    dataset = get_nis3d_dataset(path, patch_shape, download, **ds_kwargs)
121    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/11456029/files/NIS3D.zip'
CHECKSUM = '3eb60b48eba87a5eeb71e9676d6df64296adc3dd93234a1db80cd9a0da28cd83'
def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the NIS3D dataset.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        The filepath for the downloaded data.
38    """
39    data_dir = os.path.join(path, "NIS3D")
40    if os.path.exists(data_dir):
41        return data_dir
42
43    os.makedirs(path, exist_ok=True)
44
45    zip_path = os.path.join(path, "NIS3D.zip")
46    util.download_source(zip_path, URL, download, CHECKSUM)
47    util.unzip(zip_path, path)
48
49    # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually.
50    gt_path = os.path.join(data_dir, "NIS3D", "MusMusculus_2", "gt.tif")
51    shutil.move(src=gt_path, dst=gt_path.replace("gt", "GroundTruth"))
52
53    return data_dir

Download the NIS3D dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath for the downloaded data.

def get_nis3d_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
56def get_nis3d_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
57    """Get paths to the NIS3D data.
58
59    Args:
60        path: Filepath to a folder where the downloaded data will be saved.
61        download: Whether to download the data if it is not present.
62
63    Returns:
64        List of filepaths for the image data.
65        List of filepaths for the label data.
66    """
67    data_dir = get_nis3d_data(path, download)
68
69    raw_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "data.tif")))
70    label_paths = natsorted(glob(os.path.join(data_dir, "NIS3D", "*", "GroundTruth.tif")))
71
72    assert len(raw_paths) and len(raw_paths) == len(label_paths)
73
74    return raw_paths, label_paths

Get paths to the NIS3D data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_nis3d_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 77def get_nis3d_dataset(
 78    path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs,
 79) -> Dataset:
 80    """Get the NIS3D dataset for nucleus segmentation.
 81
 82    Args:
 83        path: Filepath to a folder where the downloaded data will be saved.
 84        patch_shape: The patch shape to use for training.
 85        download: Whether to download the data if it is not present.
 86        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 87
 88    Returns:
 89        The segmentation dataset.
 90    """
 91
 92    raw_paths, label_paths = get_nis3d_paths(path, download)
 93
 94    return torch_em.default_segmentation_dataset(
 95        raw_paths=raw_paths,
 96        raw_key=None,
 97        label_paths=label_paths,
 98        label_key=None,
 99        is_seg_dataset=True,
100        patch_shape=patch_shape,
101        **kwargs
102    )

Get the NIS3D dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_nis3d_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
105def get_nis3d_loader(
106    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs,
107) -> DataLoader:
108    """Get the NIS3D dataloader for nucleus segmentation.
109
110    Args:
111        path: Filepath to a folder where the downloaded data will be saved.
112        batch_size: The batch size for training.
113        patch_shape: The patch shape to use for training.
114        download: Whether to download the data if it is not present.
115        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
116
117    Returns:
118        The DataLoader
119    """
120    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
121    dataset = get_nis3d_dataset(path, patch_shape, download, **ds_kwargs)
122    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the NIS3D dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader