torch_em.data.datasets.light_microscopy.nis3d

The NIS3D dataset contains fluorescence microscopy volumetric images of multiple species (drosophila, zebrafish, etc) for nucleus segmentation.

The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D. And the dataset is open-sourced at https://zenodo.org/records/11456029.

Please cite them if you use this dataset for your research.

  1"""The NIS3D dataset contains fluorescence microscopy volumetric images of
  2multiple species (drosophila, zebrafish, etc) for nucleus segmentation.
  3
  4The dataset is from the publication https://proceedings.neurips.cc/paper_files/paper/2023/hash/0f2cd3d09a132757555b602e2dd43784-Abstract-Datasets_and_Benchmarks.html
  5The original codebase for downloading the data and other stuff is located at https://github.com/yu-lab-vt/NIS3D.
  6And the dataset is open-sourced at https://zenodo.org/records/11456029.
  7
  8Please cite them if you use this dataset for your research.
  9"""  # noqa
 10
 11import os
 12import shutil
 13from glob import glob
 14from natsort import natsorted
 15from typing import Union, Tuple, List, Literal, Optional
 16
 17from torch.utils.data import Dataset, DataLoader
 18
 19import torch_em
 20
 21from .. import util
 22
 23
 24URL = "https://zenodo.org/records/11456029/files/NIS3D.zip"
 25CHECKSUM = "3eb60b48eba87a5eeb71e9676d6df64296adc3dd93234a1db80cd9a0da28cd83"
 26
 27
 28def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the NIS3D dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        The filepath for the downloaded data.
 37    """
 38    data_dir = os.path.join(path, "NIS3D")
 39    if os.path.exists(data_dir):
 40        return data_dir
 41
 42    os.makedirs(path, exist_ok=True)
 43
 44    zip_path = os.path.join(path, "NIS3D.zip")
 45    util.download_source(zip_path, URL, download, CHECKSUM)
 46    util.unzip(zip_path, path)
 47
 48    # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually.
 49    gt_paths = glob(os.path.join(data_dir, "**", "MusMusculus_2", "gt.tif"), recursive=True)
 50    assert gt_paths, "Such mismatching paths should exist!"
 51    [shutil.move(src=p, dst=p.replace("gt", "GroundTruth")) for p in gt_paths]
 52
 53    return data_dir
 54
 55
 56def get_nis3d_paths(
 57    path: Union[os.PathLike, str],
 58    split: Optional[Literal["train", "test"]] = None,
 59    split_type: Optional[Literal["cross-image", "in-image"]] = None,
 60    download: bool = False,
 61) -> Tuple[List[str], List[str]]:
 62    """Get paths to the NIS3D data.
 63
 64    Args:
 65        path: Filepath to a folder where the downloaded data will be saved.
 66        split: The choice of data split. By default, all volumes are returned.
 67        split_type: The choice of the type of data split. By default, we get all the volumes as is.
 68        download: Whether to download the data if it is not present.
 69
 70    Returns:
 71        List of filepaths for the image data.
 72        List of filepaths for the label data.
 73    """
 74    data_dir = get_nis3d_data(path, download)
 75
 76    # First, let's set the 'split_type' analogy
 77    if split_type is None:  # We expect original volumes as is with no splitting pattern.
 78        assert split is None, "Please choose a 'split_type' before making a choice on the 'split'."
 79        split_type = "NIS3D"
 80    else:
 81        split_type = r"suggestive splitting/" + split_type
 82
 83    # Next, let's decide on the particular 'split' to be chosen.
 84    if split is None:
 85        split = "**"
 86    else:
 87        split += "/*"
 88
 89    raw_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "data.tif"), recursive=True))
 90    label_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "GroundTruth.tif"), recursive=True))
 91
 92    assert len(raw_paths) and len(raw_paths) == len(label_paths)
 93
 94    return raw_paths, label_paths
 95
 96
 97def get_nis3d_dataset(
 98    path: Union[os.PathLike, str],
 99    patch_shape: Tuple[int, ...],
100    split: Optional[Literal["train", "test"]] = None,
101    split_type: Optional[Literal["cross-image", "in-image"]] = None,
102    download: bool = False,
103    **kwargs
104) -> Dataset:
105    """Get the NIS3D dataset for nucleus segmentation.
106
107    Args:
108        path: Filepath to a folder where the downloaded data will be saved.
109        patch_shape: The patch shape to use for training.
110        split: The choice of data split. By default, all volumes are returned.
111        split_type: The choice of the type of data split. By default, we get all the volumes as is.
112        download: Whether to download the data if it is not present.
113        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
114
115    Returns:
116        The segmentation dataset.
117    """
118
119    raw_paths, label_paths = get_nis3d_paths(path, split, split_type, download)
120
121    return torch_em.default_segmentation_dataset(
122        raw_paths=raw_paths,
123        raw_key=None,
124        label_paths=label_paths,
125        label_key=None,
126        is_seg_dataset=True,
127        patch_shape=patch_shape,
128        **kwargs
129    )
130
131
132def get_nis3d_loader(
133    path: Union[os.PathLike, str],
134    batch_size: int,
135    patch_shape: Tuple[int, ...],
136    split: Optional[Literal["train", "test"]] = None,
137    split_type: Optional[Literal["cross-image", "in-image"]] = None,
138    download: bool = False, **kwargs,
139) -> DataLoader:
140    """Get the NIS3D dataloader for nucleus segmentation.
141
142    Args:
143        path: Filepath to a folder where the downloaded data will be saved.
144        batch_size: The batch size for training.
145        patch_shape: The patch shape to use for training.
146        split: The choice of data split. By default, all volumes are returned.
147        split_type: The choice of the type of data split. By default, we get all the volumes as is.
148        download: Whether to download the data if it is not present.
149        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
150
151    Returns:
152        The DataLoader
153    """
154    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
155    dataset = get_nis3d_dataset(path, patch_shape, split, split_type, download, **ds_kwargs)
156    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/11456029/files/NIS3D.zip'
CHECKSUM = '3eb60b48eba87a5eeb71e9676d6df64296adc3dd93234a1db80cd9a0da28cd83'
def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_nis3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the NIS3D dataset.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        The filepath for the downloaded data.
38    """
39    data_dir = os.path.join(path, "NIS3D")
40    if os.path.exists(data_dir):
41        return data_dir
42
43    os.makedirs(path, exist_ok=True)
44
45    zip_path = os.path.join(path, "NIS3D.zip")
46    util.download_source(zip_path, URL, download, CHECKSUM)
47    util.unzip(zip_path, path)
48
49    # NOTE: For "MusMusculus_2", the ground truth labels are named oddly. We need to fix it manually.
50    gt_paths = glob(os.path.join(data_dir, "**", "MusMusculus_2", "gt.tif"), recursive=True)
51    assert gt_paths, "Such mismatching paths should exist!"
52    [shutil.move(src=p, dst=p.replace("gt", "GroundTruth")) for p in gt_paths]
53
54    return data_dir

Download the NIS3D dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath for the downloaded data.

def get_nis3d_paths( path: Union[os.PathLike, str], split: Optional[Literal['train', 'test']] = None, split_type: Optional[Literal['cross-image', 'in-image']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
57def get_nis3d_paths(
58    path: Union[os.PathLike, str],
59    split: Optional[Literal["train", "test"]] = None,
60    split_type: Optional[Literal["cross-image", "in-image"]] = None,
61    download: bool = False,
62) -> Tuple[List[str], List[str]]:
63    """Get paths to the NIS3D data.
64
65    Args:
66        path: Filepath to a folder where the downloaded data will be saved.
67        split: The choice of data split. By default, all volumes are returned.
68        split_type: The choice of the type of data split. By default, we get all the volumes as is.
69        download: Whether to download the data if it is not present.
70
71    Returns:
72        List of filepaths for the image data.
73        List of filepaths for the label data.
74    """
75    data_dir = get_nis3d_data(path, download)
76
77    # First, let's set the 'split_type' analogy
78    if split_type is None:  # We expect original volumes as is with no splitting pattern.
79        assert split is None, "Please choose a 'split_type' before making a choice on the 'split'."
80        split_type = "NIS3D"
81    else:
82        split_type = r"suggestive splitting/" + split_type
83
84    # Next, let's decide on the particular 'split' to be chosen.
85    if split is None:
86        split = "**"
87    else:
88        split += "/*"
89
90    raw_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "data.tif"), recursive=True))
91    label_paths = natsorted(glob(os.path.join(data_dir, split_type, split, "GroundTruth.tif"), recursive=True))
92
93    assert len(raw_paths) and len(raw_paths) == len(label_paths)
94
95    return raw_paths, label_paths

Get paths to the NIS3D data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The choice of data split. By default, all volumes are returned.
  • split_type: The choice of the type of data split. By default, we get all the volumes as is.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_nis3d_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Optional[Literal['train', 'test']] = None, split_type: Optional[Literal['cross-image', 'in-image']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 98def get_nis3d_dataset(
 99    path: Union[os.PathLike, str],
100    patch_shape: Tuple[int, ...],
101    split: Optional[Literal["train", "test"]] = None,
102    split_type: Optional[Literal["cross-image", "in-image"]] = None,
103    download: bool = False,
104    **kwargs
105) -> Dataset:
106    """Get the NIS3D dataset for nucleus segmentation.
107
108    Args:
109        path: Filepath to a folder where the downloaded data will be saved.
110        patch_shape: The patch shape to use for training.
111        split: The choice of data split. By default, all volumes are returned.
112        split_type: The choice of the type of data split. By default, we get all the volumes as is.
113        download: Whether to download the data if it is not present.
114        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
115
116    Returns:
117        The segmentation dataset.
118    """
119
120    raw_paths, label_paths = get_nis3d_paths(path, split, split_type, download)
121
122    return torch_em.default_segmentation_dataset(
123        raw_paths=raw_paths,
124        raw_key=None,
125        label_paths=label_paths,
126        label_key=None,
127        is_seg_dataset=True,
128        patch_shape=patch_shape,
129        **kwargs
130    )

Get the NIS3D dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split. By default, all volumes are returned.
  • split_type: The choice of the type of data split. By default, we get all the volumes as is.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_nis3d_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Optional[Literal['train', 'test']] = None, split_type: Optional[Literal['cross-image', 'in-image']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
133def get_nis3d_loader(
134    path: Union[os.PathLike, str],
135    batch_size: int,
136    patch_shape: Tuple[int, ...],
137    split: Optional[Literal["train", "test"]] = None,
138    split_type: Optional[Literal["cross-image", "in-image"]] = None,
139    download: bool = False, **kwargs,
140) -> DataLoader:
141    """Get the NIS3D dataloader for nucleus segmentation.
142
143    Args:
144        path: Filepath to a folder where the downloaded data will be saved.
145        batch_size: The batch size for training.
146        patch_shape: The patch shape to use for training.
147        split: The choice of data split. By default, all volumes are returned.
148        split_type: The choice of the type of data split. By default, we get all the volumes as is.
149        download: Whether to download the data if it is not present.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
151
152    Returns:
153        The DataLoader
154    """
155    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
156    dataset = get_nis3d_dataset(path, patch_shape, split, split_type, download, **ds_kwargs)
157    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the NIS3D dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split. By default, all volumes are returned.
  • split_type: The choice of the type of data split. By default, we get all the volumes as is.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader