torch_em.data.datasets.histopathology.nuinsseg

The NuInsSeg dataset contains annotations for nucleus segmentation in H&E stained histology images for 31 organs across humans and rats.

The dataset is located at https://www.kaggle.com/datasets/ipateam/nuinsseg. This dataset is from the publication https://doi.org/10.1038/s41597-024-03117-2. Please cite it if you use this dataset for your research.

  1"""The NuInsSeg dataset contains annotations for nucleus segmentation in
  2H&E stained histology images for 31 organs across humans and rats.
  3
  4The dataset is located at https://www.kaggle.com/datasets/ipateam/nuinsseg.
  5This dataset is from the publication https://doi.org/10.1038/s41597-024-03117-2.
  6Please cite it if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Tuple, Union, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21def get_nuinsseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 22    """Download the NuInsSeg dataset.
 23
 24    Args:
 25        path: Filepath to a folder where the downloaded data will be saved.
 26        download: Whether to download the data if it is not present.
 27
 28    Returns:
 29        Filepath where the data is downloaded.
 30    """
 31    data_dir = os.path.join(path, "data")
 32    if os.path.exists(data_dir):
 33        return data_dir
 34
 35    os.makedirs(path, exist_ok=True)
 36
 37    util.download_source_kaggle(path=path, dataset_name="ipateam/nuinsseg", download=download)
 38    util.unzip(zip_path=os.path.join(path, "nuinsseg.zip"), dst=data_dir)
 39
 40    return data_dir
 41
 42
 43def get_nuinsseg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 44    """Get paths to the NuInsSeg data.
 45
 46    Args:
 47        path: Filepath to a folder where the downloaded data will be saved.
 48        download: Whether to download the data if it is not present.
 49
 50    Returns:
 51        List of filepaths for the image data.
 52        List of filepaths for the label data.
 53    """
 54    data_dir = get_nuinsseg_data(path, download)
 55
 56    tissue_type_dirs = glob(os.path.join(data_dir, "*"))
 57    raw_paths = [p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "tissue images", "*.png")))]
 58    label_paths = [
 59        p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "label masks modify", "*.tif")))
 60    ]
 61
 62    return raw_paths, label_paths
 63
 64
 65def get_nuinsseg_dataset(
 66    path: Union[os.PathLike, str],
 67    patch_shape: Tuple[int, int],
 68    resize_inputs: bool = False,
 69    download: bool = False,
 70    **kwargs
 71) -> Dataset:
 72    """Get the NuInsSeg dataset for nucleus segmentation.
 73
 74    Args:
 75        path: Filepath to a folder where the downloaded data will be saved.
 76        patch_shape: The patch shape to use for training.
 77        resize_inputs: Whether to resize the inputs.
 78        download: Whether to download the data if it is not present.
 79        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 80
 81    Returns:
 82        The segmentation dataset.
 83    """
 84    raw_paths, label_paths = get_nuinsseg_paths(path, download)
 85
 86    if resize_inputs:
 87        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
 88        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
 89            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
 90        )
 91
 92    return torch_em.default_segmentation_dataset(
 93        raw_paths=raw_paths,
 94        raw_key=None,
 95        label_paths=label_paths,
 96        label_key=None,
 97        is_seg_dataset=False,
 98        patch_shape=patch_shape,
 99        ndim=2,
100        with_channels=True,
101        **kwargs
102    )
103
104
105def get_nuinsseg_loader(
106    path: Union[os.PathLike, str],
107    batch_size: int,
108    patch_shape: Tuple[int, int],
109    resize_inputs: bool = False,
110    download: bool = False,
111    **kwargs
112) -> DataLoader:
113    """Get the NuInsSeg dataloader for nucleus segmentation.
114
115    Args:
116        path: Filepath to a folder where the downloaded data will be saved.
117        batch_size: The batch size for training.
118        patch_shape: The patch shape to use for training.
119        resize_inputs: Whether to resize the inputs.
120        download: Whether to download the data if it is not present.
121        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
122
123    Returns:
124        The DataLoader.
125    """
126    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
127    dataset = get_nuinsseg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
128    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def get_nuinsseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
22def get_nuinsseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
23    """Download the NuInsSeg dataset.
24
25    Args:
26        path: Filepath to a folder where the downloaded data will be saved.
27        download: Whether to download the data if it is not present.
28
29    Returns:
30        Filepath where the data is downloaded.
31    """
32    data_dir = os.path.join(path, "data")
33    if os.path.exists(data_dir):
34        return data_dir
35
36    os.makedirs(path, exist_ok=True)
37
38    util.download_source_kaggle(path=path, dataset_name="ipateam/nuinsseg", download=download)
39    util.unzip(zip_path=os.path.join(path, "nuinsseg.zip"), dst=data_dir)
40
41    return data_dir

Download the NuInsSeg dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_nuinsseg_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
44def get_nuinsseg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
45    """Get paths to the NuInsSeg data.
46
47    Args:
48        path: Filepath to a folder where the downloaded data will be saved.
49        download: Whether to download the data if it is not present.
50
51    Returns:
52        List of filepaths for the image data.
53        List of filepaths for the label data.
54    """
55    data_dir = get_nuinsseg_data(path, download)
56
57    tissue_type_dirs = glob(os.path.join(data_dir, "*"))
58    raw_paths = [p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "tissue images", "*.png")))]
59    label_paths = [
60        p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "label masks modify", "*.tif")))
61    ]
62
63    return raw_paths, label_paths

Get paths to the NuInsSeg data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_nuinsseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 66def get_nuinsseg_dataset(
 67    path: Union[os.PathLike, str],
 68    patch_shape: Tuple[int, int],
 69    resize_inputs: bool = False,
 70    download: bool = False,
 71    **kwargs
 72) -> Dataset:
 73    """Get the NuInsSeg dataset for nucleus segmentation.
 74
 75    Args:
 76        path: Filepath to a folder where the downloaded data will be saved.
 77        patch_shape: The patch shape to use for training.
 78        resize_inputs: Whether to resize the inputs.
 79        download: Whether to download the data if it is not present.
 80        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 81
 82    Returns:
 83        The segmentation dataset.
 84    """
 85    raw_paths, label_paths = get_nuinsseg_paths(path, download)
 86
 87    if resize_inputs:
 88        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
 89        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
 90            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
 91        )
 92
 93    return torch_em.default_segmentation_dataset(
 94        raw_paths=raw_paths,
 95        raw_key=None,
 96        label_paths=label_paths,
 97        label_key=None,
 98        is_seg_dataset=False,
 99        patch_shape=patch_shape,
100        ndim=2,
101        with_channels=True,
102        **kwargs
103    )

Get the NuInsSeg dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_nuinsseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
106def get_nuinsseg_loader(
107    path: Union[os.PathLike, str],
108    batch_size: int,
109    patch_shape: Tuple[int, int],
110    resize_inputs: bool = False,
111    download: bool = False,
112    **kwargs
113) -> DataLoader:
114    """Get the NuInsSeg dataloader for nucleus segmentation.
115
116    Args:
117        path: Filepath to a folder where the downloaded data will be saved.
118        batch_size: The batch size for training.
119        patch_shape: The patch shape to use for training.
120        resize_inputs: Whether to resize the inputs.
121        download: Whether to download the data if it is not present.
122        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
123
124    Returns:
125        The DataLoader.
126    """
127    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
128    dataset = get_nuinsseg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
129    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the NuInsSeg dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.