torch_em.data.datasets.histopathology.nuclick
The NuClick dataset contains annotations for lymphocytes in IHC histopathology images.
This dataset is located at https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/. The dataset is from the publication http://www.sciencedirect.com/science/article/pii/S1361841520301353. Please cite it if you use this dataset for your research.
1"""The NuClick dataset contains annotations for lymphocytes in IHC histopathology images. 2 3This dataset is located at https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/. 4The dataset is from the publication http://www.sciencedirect.com/science/article/pii/S1361841520301353. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from pathlib import Path 12from natsort import natsorted 13from typing import Tuple, List, Literal, Union 14 15import numpy as np 16import imageio.v3 as imageio 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25URL = "https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/ihc_nuclick.zip" 26CHECKSUM = "5128f1dfcba531e89b49e26364bc667eeb9978fa0039baa25a7f73fdaec2d736" 27 28 29def get_nuclick_data(path: Union[os.PathLike, str], download: bool = False): 30 """Download the NuClick dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 donwload: Whether to download the data if it is not present. 35 36 Returns: 37 Filepath where the dataset is downloaded. 38 """ 39 data_dir = os.path.join(path, "IHC_nuclick", "IHC") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "ihc_nuclick.zip") 46 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 47 util.unzip(zip_path=zip_path, dst=path) 48 49 return data_dir 50 51 52def get_nuclick_paths( 53 path: Union[os.PathLike, str], split: Literal["Train", "Validation"], download: bool = False, 54) -> Tuple[List[str], List[str]]: 55 """Get paths to the NuClick data. 56 57 Args: 58 path: Filepath to a folder where the downloaded data will be saved. 59 split: The split to use for the dataset. Either 'Train' or 'Validation'. 60 donwload: Whether to download the data if it is not present. 61 62 Returns: 63 List of filepaths for the image data. 64 List of filepaths for the label data. 65 """ 66 data_dir = get_nuclick_data(path, download) 67 68 raw_paths = natsorted(glob(os.path.join(data_dir, "images", split, "*.png"))) 69 label_paths = natsorted(glob(os.path.join(data_dir, "masks", split, "*.npy"))) 70 71 neu_label_paths = [] 72 for lpath in tqdm(label_paths): 73 neu_lpath = Path(lpath).with_suffix(".tif") 74 neu_label_paths.append(str(neu_lpath)) 75 if os.path.exists(neu_lpath): 76 continue 77 78 imageio.imwrite(neu_lpath, np.load(lpath), compression="zlib") 79 80 return raw_paths, neu_label_paths 81 82 83def get_nuclick_dataset( 84 path: Union[os.PathLike, str], 85 patch_shape: Tuple[int, int], 86 split: Literal["Train", "Validation"], 87 resize_inputs: bool = False, 88 download: bool = False, 89 **kwargs 90) -> Dataset: 91 """Get the NuClick dataset for lymphocyte segmentation. 92 93 Args: 94 path: Filepath to a folder where the downloaded data will be saved. 95 patch_shape: The patch shape to use for training. 96 split: The split to use for the dataset. Either 'Train' or 'Validation'. 97 resize_inputs: Whether to resize the inputs. 98 download: Whether to download the data if it is not present. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 100 101 Returns: 102 The segmentation dataset. 103 """ 104 raw_paths, label_paths = get_nuclick_paths(path, split, download) 105 106 if resize_inputs: 107 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 108 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 109 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 110 ) 111 112 return torch_em.default_segmentation_dataset( 113 raw_paths=raw_paths, 114 raw_key=None, 115 label_paths=label_paths, 116 label_key=None, 117 is_seg_dataset=False, 118 with_channels=True, 119 ndim=2, 120 patch_shape=patch_shape, 121 **kwargs 122 ) 123 124 125def get_nuclick_loader( 126 path: Union[os.PathLike, str], 127 batch_size: int, 128 patch_shape: Tuple[int, int], 129 split: Literal["Train", "Validation"], 130 resize_inputs: bool = False, 131 download: bool = False, 132 **kwargs 133) -> DataLoader: 134 """Get the NuClick dataloader for lymphocyte segmentation. 135 136 Args: 137 path: Filepath to a folder where the downloaded data will be saved. 138 patch_shape: The patch shape to use for training. 139 split: The split to use for the dataset. Either 'Train' or 'Validation'. 140 resize_inputs: Whether to resize the inputs. 141 download: Whether to download the data if it is not present. 142 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 143 144 Returns: 145 The DataLoader. 146 """ 147 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 148 dataset = get_nuclick_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 149 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://warwick.ac.uk/fac/cross_fac/tia/data/nuclick/ihc_nuclick.zip'
CHECKSUM =
'5128f1dfcba531e89b49e26364bc667eeb9978fa0039baa25a7f73fdaec2d736'
def
get_nuclick_data(path: Union[os.PathLike, str], download: bool = False):
30def get_nuclick_data(path: Union[os.PathLike, str], download: bool = False): 31 """Download the NuClick dataset. 32 33 Args: 34 path: Filepath to a folder where the downloaded data will be saved. 35 donwload: Whether to download the data if it is not present. 36 37 Returns: 38 Filepath where the dataset is downloaded. 39 """ 40 data_dir = os.path.join(path, "IHC_nuclick", "IHC") 41 if os.path.exists(data_dir): 42 return data_dir 43 44 os.makedirs(path, exist_ok=True) 45 46 zip_path = os.path.join(path, "ihc_nuclick.zip") 47 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 48 util.unzip(zip_path=zip_path, dst=path) 49 50 return data_dir
Download the NuClick dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- donwload: Whether to download the data if it is not present.
Returns:
Filepath where the dataset is downloaded.
def
get_nuclick_paths( path: Union[os.PathLike, str], split: Literal['Train', 'Validation'], download: bool = False) -> Tuple[List[str], List[str]]:
53def get_nuclick_paths( 54 path: Union[os.PathLike, str], split: Literal["Train", "Validation"], download: bool = False, 55) -> Tuple[List[str], List[str]]: 56 """Get paths to the NuClick data. 57 58 Args: 59 path: Filepath to a folder where the downloaded data will be saved. 60 split: The split to use for the dataset. Either 'Train' or 'Validation'. 61 donwload: Whether to download the data if it is not present. 62 63 Returns: 64 List of filepaths for the image data. 65 List of filepaths for the label data. 66 """ 67 data_dir = get_nuclick_data(path, download) 68 69 raw_paths = natsorted(glob(os.path.join(data_dir, "images", split, "*.png"))) 70 label_paths = natsorted(glob(os.path.join(data_dir, "masks", split, "*.npy"))) 71 72 neu_label_paths = [] 73 for lpath in tqdm(label_paths): 74 neu_lpath = Path(lpath).with_suffix(".tif") 75 neu_label_paths.append(str(neu_lpath)) 76 if os.path.exists(neu_lpath): 77 continue 78 79 imageio.imwrite(neu_lpath, np.load(lpath), compression="zlib") 80 81 return raw_paths, neu_label_paths
Get paths to the NuClick data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'Train' or 'Validation'.
- donwload: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_nuclick_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['Train', 'Validation'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
84def get_nuclick_dataset( 85 path: Union[os.PathLike, str], 86 patch_shape: Tuple[int, int], 87 split: Literal["Train", "Validation"], 88 resize_inputs: bool = False, 89 download: bool = False, 90 **kwargs 91) -> Dataset: 92 """Get the NuClick dataset for lymphocyte segmentation. 93 94 Args: 95 path: Filepath to a folder where the downloaded data will be saved. 96 patch_shape: The patch shape to use for training. 97 split: The split to use for the dataset. Either 'Train' or 'Validation'. 98 resize_inputs: Whether to resize the inputs. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 raw_paths, label_paths = get_nuclick_paths(path, split, download) 106 107 if resize_inputs: 108 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 109 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 110 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 111 ) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=raw_paths, 115 raw_key=None, 116 label_paths=label_paths, 117 label_key=None, 118 is_seg_dataset=False, 119 with_channels=True, 120 ndim=2, 121 patch_shape=patch_shape, 122 **kwargs 123 )
Get the NuClick dataset for lymphocyte segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The split to use for the dataset. Either 'Train' or 'Validation'.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_nuclick_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['Train', 'Validation'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
126def get_nuclick_loader( 127 path: Union[os.PathLike, str], 128 batch_size: int, 129 patch_shape: Tuple[int, int], 130 split: Literal["Train", "Validation"], 131 resize_inputs: bool = False, 132 download: bool = False, 133 **kwargs 134) -> DataLoader: 135 """Get the NuClick dataloader for lymphocyte segmentation. 136 137 Args: 138 path: Filepath to a folder where the downloaded data will be saved. 139 patch_shape: The patch shape to use for training. 140 split: The split to use for the dataset. Either 'Train' or 'Validation'. 141 resize_inputs: Whether to resize the inputs. 142 download: Whether to download the data if it is not present. 143 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 144 145 Returns: 146 The DataLoader. 147 """ 148 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 149 dataset = get_nuclick_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 150 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the NuClick dataloader for lymphocyte segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The split to use for the dataset. Either 'Train' or 'Validation'.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.