torch_em.data.datasets.histopathology.nuclick

The NuClick dataset contains annotations for lymphocytes in IHC histopathology images.

This dataset is located at https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/. The dataset is from the publication http://www.sciencedirect.com/science/article/pii/S1361841520301353. Please cite it if you use this dataset for your research.

  1"""The NuClick dataset contains annotations for lymphocytes in IHC histopathology images.
  2
  3This dataset is located at https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/.
  4The dataset is from the publication http://www.sciencedirect.com/science/article/pii/S1361841520301353.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from tqdm import tqdm
 11from pathlib import Path
 12from natsort import natsorted
 13from typing import Tuple, List, Literal, Union
 14
 15import numpy as np
 16import imageio.v3 as imageio
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25URL = "https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/ihc_nuclick.zip"
 26CHECKSUM = "5128f1dfcba531e89b49e26364bc667eeb9978fa0039baa25a7f73fdaec2d736"
 27
 28
 29def get_nuclick_data(path: Union[os.PathLike, str], download: bool = False):
 30    """Download the NuClick dataset.
 31
 32    Args:
 33        path: Filepath to a folder where the downloaded data will be saved.
 34        donwload: Whether to download the data if it is not present.
 35
 36    Returns:
 37        Filepath where the dataset is downloaded.
 38    """
 39    data_dir = os.path.join(path, "IHC_nuclick", "IHC")
 40    if os.path.exists(data_dir):
 41        return data_dir
 42
 43    os.makedirs(path, exist_ok=True)
 44
 45    zip_path = os.path.join(path, "ihc_nuclick.zip")
 46    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 47    util.unzip(zip_path=zip_path, dst=path)
 48
 49    return data_dir
 50
 51
 52def get_nuclick_paths(
 53    path: Union[os.PathLike, str], split: Literal["Train", "Validation"], download: bool = False,
 54) -> Tuple[List[str], List[str]]:
 55    """Get paths to the NuClick data.
 56
 57    Args:
 58        path: Filepath to a folder where the downloaded data will be saved.
 59        split: The split to use for the dataset. Either 'Train' or 'Validation'.
 60        donwload: Whether to download the data if it is not present.
 61
 62    Returns:
 63        List of filepaths for the image data.
 64        List of filepaths for the label data.
 65    """
 66    data_dir = get_nuclick_data(path, download)
 67
 68    raw_paths = natsorted(glob(os.path.join(data_dir, "images", split, "*.png")))
 69    label_paths = natsorted(glob(os.path.join(data_dir, "masks", split, "*.npy")))
 70
 71    neu_label_paths = []
 72    for lpath in tqdm(label_paths):
 73        neu_lpath = Path(lpath).with_suffix(".tif")
 74        neu_label_paths.append(str(neu_lpath))
 75        if os.path.exists(neu_lpath):
 76            continue
 77
 78        imageio.imwrite(neu_lpath, np.load(lpath), compression="zlib")
 79
 80    return raw_paths, neu_label_paths
 81
 82
 83def get_nuclick_dataset(
 84    path: Union[os.PathLike, str],
 85    patch_shape: Tuple[int, int],
 86    split: Literal["Train", "Validation"],
 87    resize_inputs: bool = False,
 88    download: bool = False,
 89    **kwargs
 90) -> Dataset:
 91    """Get the NuClick dataset for lymphocyte segmentation.
 92
 93    Args:
 94        path: Filepath to a folder where the downloaded data will be saved.
 95        patch_shape: The patch shape to use for training.
 96        split: The split to use for the dataset. Either 'Train' or 'Validation'.
 97        resize_inputs: Whether to resize the inputs.
 98        download: Whether to download the data if it is not present.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
100
101    Returns:
102        The segmentation dataset.
103    """
104    raw_paths, label_paths = get_nuclick_paths(path, split, download)
105
106    if resize_inputs:
107        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
108        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
109            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
110        )
111
112    return torch_em.default_segmentation_dataset(
113        raw_paths=raw_paths,
114        raw_key=None,
115        label_paths=label_paths,
116        label_key=None,
117        is_seg_dataset=False,
118        with_channels=True,
119        ndim=2,
120        patch_shape=patch_shape,
121        **kwargs
122    )
123
124
125def get_nuclick_loader(
126    path: Union[os.PathLike, str],
127    batch_size: int,
128    patch_shape: Tuple[int, int],
129    split: Literal["Train", "Validation"],
130    resize_inputs: bool = False,
131    download: bool = False,
132    **kwargs
133) -> DataLoader:
134    """Get the NuClick dataloader for lymphocyte segmentation.
135
136    Args:
137        path: Filepath to a folder where the downloaded data will be saved.
138        patch_shape: The patch shape to use for training.
139        split: The split to use for the dataset. Either 'Train' or 'Validation'.
140        resize_inputs: Whether to resize the inputs.
141        download: Whether to download the data if it is not present.
142        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
143
144    Returns:
145        The DataLoader.
146    """
147    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
148    dataset = get_nuclick_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
149    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/ihc_nuclick.zip'
CHECKSUM = '5128f1dfcba531e89b49e26364bc667eeb9978fa0039baa25a7f73fdaec2d736'
def get_nuclick_data(path: Union[os.PathLike, str], download: bool = False):
30def get_nuclick_data(path: Union[os.PathLike, str], download: bool = False):
31    """Download the NuClick dataset.
32
33    Args:
34        path: Filepath to a folder where the downloaded data will be saved.
35        donwload: Whether to download the data if it is not present.
36
37    Returns:
38        Filepath where the dataset is downloaded.
39    """
40    data_dir = os.path.join(path, "IHC_nuclick", "IHC")
41    if os.path.exists(data_dir):
42        return data_dir
43
44    os.makedirs(path, exist_ok=True)
45
46    zip_path = os.path.join(path, "ihc_nuclick.zip")
47    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
48    util.unzip(zip_path=zip_path, dst=path)
49
50    return data_dir

Download the NuClick dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • donwload: Whether to download the data if it is not present.
Returns:

Filepath where the dataset is downloaded.

def get_nuclick_paths( path: Union[os.PathLike, str], split: Literal['Train', 'Validation'], download: bool = False) -> Tuple[List[str], List[str]]:
53def get_nuclick_paths(
54    path: Union[os.PathLike, str], split: Literal["Train", "Validation"], download: bool = False,
55) -> Tuple[List[str], List[str]]:
56    """Get paths to the NuClick data.
57
58    Args:
59        path: Filepath to a folder where the downloaded data will be saved.
60        split: The split to use for the dataset. Either 'Train' or 'Validation'.
61        donwload: Whether to download the data if it is not present.
62
63    Returns:
64        List of filepaths for the image data.
65        List of filepaths for the label data.
66    """
67    data_dir = get_nuclick_data(path, download)
68
69    raw_paths = natsorted(glob(os.path.join(data_dir, "images", split, "*.png")))
70    label_paths = natsorted(glob(os.path.join(data_dir, "masks", split, "*.npy")))
71
72    neu_label_paths = []
73    for lpath in tqdm(label_paths):
74        neu_lpath = Path(lpath).with_suffix(".tif")
75        neu_label_paths.append(str(neu_lpath))
76        if os.path.exists(neu_lpath):
77            continue
78
79        imageio.imwrite(neu_lpath, np.load(lpath), compression="zlib")
80
81    return raw_paths, neu_label_paths

Get paths to the NuClick data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use for the dataset. Either 'Train' or 'Validation'.
  • donwload: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_nuclick_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['Train', 'Validation'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 84def get_nuclick_dataset(
 85    path: Union[os.PathLike, str],
 86    patch_shape: Tuple[int, int],
 87    split: Literal["Train", "Validation"],
 88    resize_inputs: bool = False,
 89    download: bool = False,
 90    **kwargs
 91) -> Dataset:
 92    """Get the NuClick dataset for lymphocyte segmentation.
 93
 94    Args:
 95        path: Filepath to a folder where the downloaded data will be saved.
 96        patch_shape: The patch shape to use for training.
 97        split: The split to use for the dataset. Either 'Train' or 'Validation'.
 98        resize_inputs: Whether to resize the inputs.
 99        download: Whether to download the data if it is not present.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103        The segmentation dataset.
104    """
105    raw_paths, label_paths = get_nuclick_paths(path, split, download)
106
107    if resize_inputs:
108        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
109        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
110            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
111        )
112
113    return torch_em.default_segmentation_dataset(
114        raw_paths=raw_paths,
115        raw_key=None,
116        label_paths=label_paths,
117        label_key=None,
118        is_seg_dataset=False,
119        with_channels=True,
120        ndim=2,
121        patch_shape=patch_shape,
122        **kwargs
123    )

Get the NuClick dataset for lymphocyte segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The split to use for the dataset. Either 'Train' or 'Validation'.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_nuclick_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['Train', 'Validation'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
126def get_nuclick_loader(
127    path: Union[os.PathLike, str],
128    batch_size: int,
129    patch_shape: Tuple[int, int],
130    split: Literal["Train", "Validation"],
131    resize_inputs: bool = False,
132    download: bool = False,
133    **kwargs
134) -> DataLoader:
135    """Get the NuClick dataloader for lymphocyte segmentation.
136
137    Args:
138        path: Filepath to a folder where the downloaded data will be saved.
139        patch_shape: The patch shape to use for training.
140        split: The split to use for the dataset. Either 'Train' or 'Validation'.
141        resize_inputs: Whether to resize the inputs.
142        download: Whether to download the data if it is not present.
143        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
144
145    Returns:
146        The DataLoader.
147    """
148    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
149    dataset = get_nuclick_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
150    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the NuClick dataloader for lymphocyte segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The split to use for the dataset. Either 'Train' or 'Validation'.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.