torch_em.data.datasets.light_microscopy.nisnet3d

The NISNet3D dataset contains 3D fluorescence microscopy images of nuclei with manually annotated instance segmentation, curated for training and evaluating 3D nuclear instance segmentation methods.

The dataset contains annotated subvolumes from eight microscopy volumes:

  • BABB-cleared_kidney_1: BABB-cleared rat kidney (confocal, DAPI)
  • Cleared_mouse_intestine_1: Cleared mouse intestine (confocal, Hoechst)
  • Diabetic_Biopsy_Human_Spectral_1: Diabetic biopsy human spectral (5 subvolumes)
  • Diabetic_Biopsy_Human_Spectral_3: Diabetic biopsy human spectral (6 subvolumes)
  • Kidney_Cortex_Human_Spectral_1: Kidney cortex human spectral (6 subvolumes)
  • Kidney_Human_Nephrectomy_1: Kidney human nephrectomy (4 subvolumes)
  • Rat_liver_1: Shallow rat liver (confocal, Hoechst) — entire volume annotated
  • Scale-cleared_rat_kidney_1: Scale-cleared rat kidney (confocal, DAPI)

NOTE: The original paper also includes a V5 volume (zebrafish brain EM) sourced from the NucMM dataset, which is already available in torch-em under torch_em.data.datasets.electron_microscopy.nuc_mm. It is therefore excluded here to avoid duplication.

The dataset is located at https://zenodo.org/records/7065147. This dataset is from the publication https://doi.org/10.1038/s41598-023-36243-9. Please cite it if you use this dataset in your research.

  1"""The NISNet3D dataset contains 3D fluorescence microscopy images of nuclei
  2with manually annotated instance segmentation, curated for training and
  3evaluating 3D nuclear instance segmentation methods.
  4
  5The dataset contains annotated subvolumes from eight microscopy volumes:
  6- BABB-cleared_kidney_1: BABB-cleared rat kidney (confocal, DAPI)
  7- Cleared_mouse_intestine_1: Cleared mouse intestine (confocal, Hoechst)
  8- Diabetic_Biopsy_Human_Spectral_1: Diabetic biopsy human spectral (5 subvolumes)
  9- Diabetic_Biopsy_Human_Spectral_3: Diabetic biopsy human spectral (6 subvolumes)
 10- Kidney_Cortex_Human_Spectral_1: Kidney cortex human spectral (6 subvolumes)
 11- Kidney_Human_Nephrectomy_1: Kidney human nephrectomy (4 subvolumes)
 12- Rat_liver_1: Shallow rat liver (confocal, Hoechst) — entire volume annotated
 13- Scale-cleared_rat_kidney_1: Scale-cleared rat kidney (confocal, DAPI)
 14
 15NOTE: The original paper also includes a V5 volume (zebrafish brain EM) sourced
 16from the NucMM dataset, which is already available in torch-em under
 17`torch_em.data.datasets.electron_microscopy.nuc_mm`. It is therefore excluded
 18here to avoid duplication.
 19
 20The dataset is located at https://zenodo.org/records/7065147.
 21This dataset is from the publication https://doi.org/10.1038/s41598-023-36243-9.
 22Please cite it if you use this dataset in your research.
 23"""
 24
 25import os
 26from glob import glob
 27from natsort import natsorted
 28from typing import List, Literal, Optional, Tuple, Union
 29
 30from torch.utils.data import Dataset, DataLoader
 31
 32import torch_em
 33
 34from .. import util
 35
 36
 37URL = "https://zenodo.org/records/7065147/files/ground_truth_and_synthetic.zip"
 38CHECKSUM = "02f8ad4a6e489283548ea4f0c2c39ac975531c09b58e4d6f498b4e49ac73f0d3"
 39
 40VOLUMES = [
 41    "BABB-cleared_kidney_1",
 42    "Cleared_mouse_intestine_1",
 43    "Diabetic_Biopsy_Human_Spectral_1",
 44    "Diabetic_Biopsy_Human_Spectral_3",
 45    "Kidney_Cortex_Human_Spectral_1",
 46    "Kidney_Human_Nephrectomy_1",
 47    "Rat_liver_1",
 48    "Scale-cleared_rat_kidney_1",
 49]
 50
 51
 52def get_nisnet3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 53    """Download the NISNet3D dataset.
 54
 55    Args:
 56        path: Filepath to a folder where the downloaded data will be saved.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        The filepath to the extracted data directory.
 61    """
 62    data_dir = os.path.join(path, "ground_truth_and_synthetic")
 63    if os.path.exists(data_dir):
 64        return data_dir
 65
 66    os.makedirs(path, exist_ok=True)
 67    zip_path = os.path.join(path, "ground_truth_and_synthetic.zip")
 68    util.download_source(zip_path, URL, download, checksum=CHECKSUM)
 69    util.unzip(zip_path, path)
 70
 71    return data_dir
 72
 73
 74def get_nisnet3d_paths(
 75    path: Union[os.PathLike, str],
 76    volumes: Optional[List[Literal[
 77        "BABB-cleared_kidney_1",
 78        "Cleared_mouse_intestine_1",
 79        "Diabetic_Biopsy_Human_Spectral_1",
 80        "Diabetic_Biopsy_Human_Spectral_3",
 81        "Kidney_Cortex_Human_Spectral_1",
 82        "Kidney_Human_Nephrectomy_1",
 83        "Rat_liver_1",
 84        "Scale-cleared_rat_kidney_1",
 85    ]]] = None,
 86    download: bool = False,
 87) -> Tuple[List[str], List[str]]:
 88    """Get paths to the NISNet3D data.
 89
 90    Args:
 91        path: Filepath to a folder where the downloaded data will be saved.
 92        volumes: The volume(s) to use. Defaults to all 8 volumes.
 93        download: Whether to download the data if it is not present.
 94
 95    Returns:
 96        List of filepaths for the image data.
 97        List of filepaths for the label data.
 98    """
 99    if volumes is None:
100        volumes = VOLUMES
101    else:
102        invalid = [v for v in volumes if v not in VOLUMES]
103        if invalid:
104            raise ValueError(f"Invalid volumes: {invalid}. Valid choices are {VOLUMES}.")
105
106    data_dir = get_nisnet3d_data(path, download)
107
108    raw_paths, label_paths = [], []
109    for vol in volumes:
110        vol_dir = os.path.join(data_dir, vol)
111        if not os.path.exists(vol_dir):
112            raise RuntimeError(
113                f"Volume directory not found: {vol_dir}. "
114                "Please check the dataset structure after downloading."
115            )
116        # Each subvolume folder contains {name}.tif (raw) and {name}_gt.tif (label).
117        # The 'synthetic' subfolder is skipped.
118        for sub_dir in natsorted(glob(os.path.join(vol_dir, "*"))):
119            if not os.path.isdir(sub_dir) or os.path.basename(sub_dir) == "synthetic":
120                continue
121            sub_name = os.path.basename(sub_dir)
122            raw_file = os.path.join(sub_dir, f"{sub_name}.tif")
123            label_file = os.path.join(sub_dir, f"{sub_name}_gt.tif")
124            if os.path.exists(raw_file) and os.path.exists(label_file):
125                raw_paths.append(raw_file)
126                label_paths.append(label_file)
127
128    if len(raw_paths) == 0:
129        raise RuntimeError(
130            f"No image files found under {data_dir}. "
131            "Please check the dataset structure."
132        )
133
134    return raw_paths, label_paths
135
136
137def get_nisnet3d_dataset(
138    path: Union[os.PathLike, str],
139    patch_shape: Tuple[int, ...],
140    volumes: Optional[List[str]] = None,
141    download: bool = False,
142    **kwargs,
143) -> Dataset:
144    """Get the NISNet3D dataset for 3D nuclear instance segmentation.
145
146    Args:
147        path: Filepath to a folder where the downloaded data will be saved.
148        patch_shape: The patch shape to use for training.
149        volumes: The volume(s) to use. Defaults to all 8 volumes.
150        download: Whether to download the data if it is not present.
151        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
152
153    Returns:
154        The segmentation dataset.
155    """
156    raw_paths, label_paths = get_nisnet3d_paths(path, volumes, download)
157
158    return torch_em.default_segmentation_dataset(
159        raw_paths=raw_paths,
160        raw_key=None,
161        label_paths=label_paths,
162        label_key=None,
163        patch_shape=patch_shape,
164        **kwargs,
165    )
166
167
168def get_nisnet3d_loader(
169    path: Union[os.PathLike, str],
170    batch_size: int,
171    patch_shape: Tuple[int, ...],
172    volumes: Optional[List[str]] = None,
173    download: bool = False,
174    **kwargs,
175) -> DataLoader:
176    """Get the NISNet3D dataloader for 3D nuclear instance segmentation.
177
178    Args:
179        path: Filepath to a folder where the downloaded data will be saved.
180        batch_size: The batch size for training.
181        patch_shape: The patch shape to use for training.
182        volumes: The volume(s) to use. Defaults to all 8 volumes.
183        download: Whether to download the data if it is not present.
184        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
185
186    Returns:
187        The DataLoader.
188    """
189    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
190    dataset = get_nisnet3d_dataset(path, patch_shape, volumes, download, **ds_kwargs)
191    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/7065147/files/ground_truth_and_synthetic.zip'
CHECKSUM = '02f8ad4a6e489283548ea4f0c2c39ac975531c09b58e4d6f498b4e49ac73f0d3'
VOLUMES = ['BABB-cleared_kidney_1', 'Cleared_mouse_intestine_1', 'Diabetic_Biopsy_Human_Spectral_1', 'Diabetic_Biopsy_Human_Spectral_3', 'Kidney_Cortex_Human_Spectral_1', 'Kidney_Human_Nephrectomy_1', 'Rat_liver_1', 'Scale-cleared_rat_kidney_1']
def get_nisnet3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
53def get_nisnet3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
54    """Download the NISNet3D dataset.
55
56    Args:
57        path: Filepath to a folder where the downloaded data will be saved.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        The filepath to the extracted data directory.
62    """
63    data_dir = os.path.join(path, "ground_truth_and_synthetic")
64    if os.path.exists(data_dir):
65        return data_dir
66
67    os.makedirs(path, exist_ok=True)
68    zip_path = os.path.join(path, "ground_truth_and_synthetic.zip")
69    util.download_source(zip_path, URL, download, checksum=CHECKSUM)
70    util.unzip(zip_path, path)
71
72    return data_dir

Download the NISNet3D dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the extracted data directory.

def get_nisnet3d_paths( path: Union[os.PathLike, str], volumes: Optional[List[Literal['BABB-cleared_kidney_1', 'Cleared_mouse_intestine_1', 'Diabetic_Biopsy_Human_Spectral_1', 'Diabetic_Biopsy_Human_Spectral_3', 'Kidney_Cortex_Human_Spectral_1', 'Kidney_Human_Nephrectomy_1', 'Rat_liver_1', 'Scale-cleared_rat_kidney_1']]] = None, download: bool = False) -> Tuple[List[str], List[str]]:
 75def get_nisnet3d_paths(
 76    path: Union[os.PathLike, str],
 77    volumes: Optional[List[Literal[
 78        "BABB-cleared_kidney_1",
 79        "Cleared_mouse_intestine_1",
 80        "Diabetic_Biopsy_Human_Spectral_1",
 81        "Diabetic_Biopsy_Human_Spectral_3",
 82        "Kidney_Cortex_Human_Spectral_1",
 83        "Kidney_Human_Nephrectomy_1",
 84        "Rat_liver_1",
 85        "Scale-cleared_rat_kidney_1",
 86    ]]] = None,
 87    download: bool = False,
 88) -> Tuple[List[str], List[str]]:
 89    """Get paths to the NISNet3D data.
 90
 91    Args:
 92        path: Filepath to a folder where the downloaded data will be saved.
 93        volumes: The volume(s) to use. Defaults to all 8 volumes.
 94        download: Whether to download the data if it is not present.
 95
 96    Returns:
 97        List of filepaths for the image data.
 98        List of filepaths for the label data.
 99    """
100    if volumes is None:
101        volumes = VOLUMES
102    else:
103        invalid = [v for v in volumes if v not in VOLUMES]
104        if invalid:
105            raise ValueError(f"Invalid volumes: {invalid}. Valid choices are {VOLUMES}.")
106
107    data_dir = get_nisnet3d_data(path, download)
108
109    raw_paths, label_paths = [], []
110    for vol in volumes:
111        vol_dir = os.path.join(data_dir, vol)
112        if not os.path.exists(vol_dir):
113            raise RuntimeError(
114                f"Volume directory not found: {vol_dir}. "
115                "Please check the dataset structure after downloading."
116            )
117        # Each subvolume folder contains {name}.tif (raw) and {name}_gt.tif (label).
118        # The 'synthetic' subfolder is skipped.
119        for sub_dir in natsorted(glob(os.path.join(vol_dir, "*"))):
120            if not os.path.isdir(sub_dir) or os.path.basename(sub_dir) == "synthetic":
121                continue
122            sub_name = os.path.basename(sub_dir)
123            raw_file = os.path.join(sub_dir, f"{sub_name}.tif")
124            label_file = os.path.join(sub_dir, f"{sub_name}_gt.tif")
125            if os.path.exists(raw_file) and os.path.exists(label_file):
126                raw_paths.append(raw_file)
127                label_paths.append(label_file)
128
129    if len(raw_paths) == 0:
130        raise RuntimeError(
131            f"No image files found under {data_dir}. "
132            "Please check the dataset structure."
133        )
134
135    return raw_paths, label_paths

Get paths to the NISNet3D data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • volumes: The volume(s) to use. Defaults to all 8 volumes.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_nisnet3d_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], volumes: Optional[List[str]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
138def get_nisnet3d_dataset(
139    path: Union[os.PathLike, str],
140    patch_shape: Tuple[int, ...],
141    volumes: Optional[List[str]] = None,
142    download: bool = False,
143    **kwargs,
144) -> Dataset:
145    """Get the NISNet3D dataset for 3D nuclear instance segmentation.
146
147    Args:
148        path: Filepath to a folder where the downloaded data will be saved.
149        patch_shape: The patch shape to use for training.
150        volumes: The volume(s) to use. Defaults to all 8 volumes.
151        download: Whether to download the data if it is not present.
152        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
153
154    Returns:
155        The segmentation dataset.
156    """
157    raw_paths, label_paths = get_nisnet3d_paths(path, volumes, download)
158
159    return torch_em.default_segmentation_dataset(
160        raw_paths=raw_paths,
161        raw_key=None,
162        label_paths=label_paths,
163        label_key=None,
164        patch_shape=patch_shape,
165        **kwargs,
166    )

Get the NISNet3D dataset for 3D nuclear instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • volumes: The volume(s) to use. Defaults to all 8 volumes.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_nisnet3d_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], volumes: Optional[List[str]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
169def get_nisnet3d_loader(
170    path: Union[os.PathLike, str],
171    batch_size: int,
172    patch_shape: Tuple[int, ...],
173    volumes: Optional[List[str]] = None,
174    download: bool = False,
175    **kwargs,
176) -> DataLoader:
177    """Get the NISNet3D dataloader for 3D nuclear instance segmentation.
178
179    Args:
180        path: Filepath to a folder where the downloaded data will be saved.
181        batch_size: The batch size for training.
182        patch_shape: The patch shape to use for training.
183        volumes: The volume(s) to use. Defaults to all 8 volumes.
184        download: Whether to download the data if it is not present.
185        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
186
187    Returns:
188        The DataLoader.
189    """
190    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
191    dataset = get_nisnet3d_dataset(path, patch_shape, volumes, download, **ds_kwargs)
192    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the NISNet3D dataloader for 3D nuclear instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • volumes: The volume(s) to use. Defaults to all 8 volumes.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.