torch_em.data.datasets.histopathology.nuinsseg
The NuInsSeg dataset contains annotations for nucleus segmentation in H&E stained histology images for 31 organs across humans and rats.
The dataset is located at https://www.kaggle.com/datasets/ipateam/nuinsseg. This dataset is from the publication https://doi.org/10.1038/s41597-024-03117-2. Please cite it if you use this dataset for your research.
1"""The NuInsSeg dataset contains annotations for nucleus segmentation in 2H&E stained histology images for 31 organs across humans and rats. 3 4The dataset is located at https://www.kaggle.com/datasets/ipateam/nuinsseg. 5This dataset is from the publication https://doi.org/10.1038/s41597-024-03117-2. 6Please cite it if you use this dataset for your research. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Tuple, Union, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21def get_nuinsseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 22 """Download the NuInsSeg dataset. 23 24 Args: 25 path: Filepath to a folder where the downloaded data will be saved. 26 download: Whether to download the data if it is not present. 27 28 Returns: 29 Filepath where the data is downloaded. 30 """ 31 data_dir = os.path.join(path, "data") 32 if os.path.exists(data_dir): 33 return data_dir 34 35 os.makedirs(path, exist_ok=True) 36 37 util.download_source_kaggle(path=path, dataset_name="ipateam/nuinsseg", download=download) 38 util.unzip(zip_path=os.path.join(path, "nuinsseg.zip"), dst=data_dir) 39 40 return data_dir 41 42 43def get_nuinsseg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 44 """Get paths to the NuInsSeg data. 45 46 Args: 47 path: Filepath to a folder where the downloaded data will be saved. 48 download: Whether to download the data if it is not present. 49 50 Returns: 51 List of filepaths for the image data. 52 List of filepaths for the label data. 53 """ 54 data_dir = get_nuinsseg_data(path, download) 55 56 tissue_type_dirs = glob(os.path.join(data_dir, "*")) 57 raw_paths = [p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "tissue images", "*.png")))] 58 label_paths = [ 59 p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "label masks modify", "*.tif"))) 60 ] 61 62 return raw_paths, label_paths 63 64 65def get_nuinsseg_dataset( 66 path: Union[os.PathLike, str], 67 patch_shape: Tuple[int, int], 68 resize_inputs: bool = False, 69 download: bool = False, 70 **kwargs 71) -> Dataset: 72 """Get the NuInsSeg dataset for nucleus segmentation. 73 74 Args: 75 path: Filepath to a folder where the downloaded data will be saved. 76 patch_shape: The patch shape to use for training. 77 resize_inputs: Whether to resize the inputs. 78 download: Whether to download the data if it is not present. 79 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 80 81 Returns: 82 The segmentation dataset. 83 """ 84 raw_paths, label_paths = get_nuinsseg_paths(path, download) 85 86 if resize_inputs: 87 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 88 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 89 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 90 ) 91 92 return torch_em.default_segmentation_dataset( 93 raw_paths=raw_paths, 94 raw_key=None, 95 label_paths=label_paths, 96 label_key=None, 97 is_seg_dataset=False, 98 patch_shape=patch_shape, 99 ndim=2, 100 with_channels=True, 101 **kwargs 102 ) 103 104 105def get_nuinsseg_loader( 106 path: Union[os.PathLike, str], 107 batch_size: int, 108 patch_shape: Tuple[int, int], 109 resize_inputs: bool = False, 110 download: bool = False, 111 **kwargs 112) -> DataLoader: 113 """Get the NuInsSeg dataloader for nucleus segmentation. 114 115 Args: 116 path: Filepath to a folder where the downloaded data will be saved. 117 batch_size: The batch size for training. 118 patch_shape: The patch shape to use for training. 119 resize_inputs: Whether to resize the inputs. 120 download: Whether to download the data if it is not present. 121 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 122 123 Returns: 124 The DataLoader. 125 """ 126 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 127 dataset = get_nuinsseg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 128 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def
get_nuinsseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
22def get_nuinsseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 23 """Download the NuInsSeg dataset. 24 25 Args: 26 path: Filepath to a folder where the downloaded data will be saved. 27 download: Whether to download the data if it is not present. 28 29 Returns: 30 Filepath where the data is downloaded. 31 """ 32 data_dir = os.path.join(path, "data") 33 if os.path.exists(data_dir): 34 return data_dir 35 36 os.makedirs(path, exist_ok=True) 37 38 util.download_source_kaggle(path=path, dataset_name="ipateam/nuinsseg", download=download) 39 util.unzip(zip_path=os.path.join(path, "nuinsseg.zip"), dst=data_dir) 40 41 return data_dir
Download the NuInsSeg dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_nuinsseg_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
44def get_nuinsseg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 45 """Get paths to the NuInsSeg data. 46 47 Args: 48 path: Filepath to a folder where the downloaded data will be saved. 49 download: Whether to download the data if it is not present. 50 51 Returns: 52 List of filepaths for the image data. 53 List of filepaths for the label data. 54 """ 55 data_dir = get_nuinsseg_data(path, download) 56 57 tissue_type_dirs = glob(os.path.join(data_dir, "*")) 58 raw_paths = [p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "tissue images", "*.png")))] 59 label_paths = [ 60 p for dir in tissue_type_dirs for p in natsorted(glob(os.path.join(dir, "label masks modify", "*.tif"))) 61 ] 62 63 return raw_paths, label_paths
Get paths to the NuInsSeg data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_nuinsseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
66def get_nuinsseg_dataset( 67 path: Union[os.PathLike, str], 68 patch_shape: Tuple[int, int], 69 resize_inputs: bool = False, 70 download: bool = False, 71 **kwargs 72) -> Dataset: 73 """Get the NuInsSeg dataset for nucleus segmentation. 74 75 Args: 76 path: Filepath to a folder where the downloaded data will be saved. 77 patch_shape: The patch shape to use for training. 78 resize_inputs: Whether to resize the inputs. 79 download: Whether to download the data if it is not present. 80 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 81 82 Returns: 83 The segmentation dataset. 84 """ 85 raw_paths, label_paths = get_nuinsseg_paths(path, download) 86 87 if resize_inputs: 88 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 89 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 90 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 91 ) 92 93 return torch_em.default_segmentation_dataset( 94 raw_paths=raw_paths, 95 raw_key=None, 96 label_paths=label_paths, 97 label_key=None, 98 is_seg_dataset=False, 99 patch_shape=patch_shape, 100 ndim=2, 101 with_channels=True, 102 **kwargs 103 )
Get the NuInsSeg dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_nuinsseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
106def get_nuinsseg_loader( 107 path: Union[os.PathLike, str], 108 batch_size: int, 109 patch_shape: Tuple[int, int], 110 resize_inputs: bool = False, 111 download: bool = False, 112 **kwargs 113) -> DataLoader: 114 """Get the NuInsSeg dataloader for nucleus segmentation. 115 116 Args: 117 path: Filepath to a folder where the downloaded data will be saved. 118 batch_size: The batch size for training. 119 patch_shape: The patch shape to use for training. 120 resize_inputs: Whether to resize the inputs. 121 download: Whether to download the data if it is not present. 122 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 123 124 Returns: 125 The DataLoader. 126 """ 127 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 128 dataset = get_nuinsseg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 129 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the NuInsSeg dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.