torch_em.data.datasets.histopathology.lynsec
The LyNSeC dataset contains annotations for nucleus segmentation in IHC and H&E stained lymphoma tissue images.
The dataset is located at https://doi.org/10.5281/zenodo.8065174. This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2024.107978. Please cite it if you use this dataset in your research.
1"""The LyNSeC dataset contains annotations for nucleus segmentation 2in IHC and H&E stained lymphoma tissue images. 3 4The dataset is located at https://doi.org/10.5281/zenodo.8065174. 5This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2024.107978. 6Please cite it if you use this dataset in your research. 7""" 8 9import os 10from glob import glob 11from tqdm import tqdm 12from pathlib import Path 13from natsort import natsorted 14from typing import Union, Tuple, List, Optional, Literal 15 16import numpy as np 17import imageio.v3 as imageio 18 19import torch_em 20 21from torch.utils.data import Dataset, DataLoader 22 23from .. import util 24 25 26URL = "https://zenodo.org/records/8065174/files/lynsec.zip" 27CHECKSUM = "14b9b5a9c39cb41afc7f31de5a995cefff0947c215e14ab9c7a463f32fbbf4b6" 28 29 30def _preprocess_dataset(data_dir): 31 data_dirs = natsorted(glob(os.path.join(data_dir, "lynsec*"))) 32 for _dir in data_dirs: 33 if os.path.basename(_dir) == "lynsec 1": 34 target_dir = "ihc" 35 else: 36 target_dir = "h&e" 37 38 image_dir = os.path.join(data_dir, target_dir, "images") 39 label_dir = os.path.join(data_dir, target_dir, "labels") 40 os.makedirs(image_dir, exist_ok=True) 41 os.makedirs(label_dir, exist_ok=True) 42 43 paths = natsorted(glob(os.path.join(_dir, "*.npy"))) 44 for fpath in tqdm(paths, desc="Preprocessing inputs"): 45 fname = Path(fpath).stem 46 darray = np.load(fpath) 47 48 raw = darray[..., :3] 49 labels = darray[..., 3] 50 51 if target_dir == "h&e" and fname in [f"{i}_l2" for i in range(35)]: # set of images have mismatching labels 52 continue 53 54 imageio.imwrite(os.path.join(image_dir, f"{fname}.tif"), raw, compression="zlib") 55 imageio.imwrite(os.path.join(label_dir, f"{fname}.tif"), labels, compression="zlib") 56 57 58def get_lynsec_data(path: Union[os.PathLike, str], download: bool = False) -> str: 59 """Download the LyNSeC dataset for nucleus segmentation. 60 61 Args: 62 path: Filepath to a folder where the downloaded data will be saved. 63 download: Whether to download the data if it is not present. 64 65 Returns: 66 The filepath to the downloaded data. 67 """ 68 data_dir = os.path.join(path, "data") 69 if os.path.exists(data_dir): 70 return data_dir 71 72 os.makedirs(data_dir, exist_ok=True) 73 74 zip_path = os.path.join(path, "lynsec.zip") 75 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 76 util.unzip(zip_path=zip_path, dst=data_dir) 77 78 _preprocess_dataset(data_dir) 79 80 return data_dir 81 82 83def get_lynsec_paths( 84 path: Union[os.PathLike, str], choice: Optional[Literal['ihc', 'h&e']] = None, download: bool = False 85) -> Tuple[List[str], List[str]]: 86 """Get paths to the LyNSec data. 87 88 Args: 89 path: Filepath to a folder where the downloaded data will be saved. 90 choice: The choice of dataset. 91 download: Whether to download the data if it is not present. 92 93 Returns: 94 List of filepaths to the image data. 95 List of filepaths to the label data. 96 """ 97 data_dir = get_lynsec_data(path, download) 98 99 if choice is None: 100 choice = "*" 101 102 raw_paths = natsorted(glob(os.path.join(data_dir, choice, "images", "*.tif"))) 103 label_paths = natsorted(glob(os.path.join(data_dir, choice, "labels", "*.tif"))) 104 105 return raw_paths, label_paths 106 107 108def get_lynsec_dataset( 109 path: Union[os.PathLike, str], 110 patch_shape: Tuple[int, int], 111 choice: Optional[Literal['ihc', 'h&e']] = None, 112 resize_inputs: bool = False, 113 download: bool = False, 114 **kwargs 115) -> Dataset: 116 """Get the LyNSeC dataset for nucleus segmentation. 117 118 Args: 119 path: Filepath to a folder where the downloaded data will be saved. 120 patch_shape: The patch shape to use for training. 121 choice: The choice of dataset. 122 resize_inputs: Whether to resize the inputs. 123 download: Whether to download the data if it is not present. 124 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 125 126 Returns: 127 The segmentation dataset. 128 """ 129 raw_paths, label_paths = get_lynsec_paths(path, choice, download) 130 131 if resize_inputs: 132 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 133 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 134 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 135 ) 136 137 return torch_em.default_segmentation_dataset( 138 raw_paths=raw_paths, 139 raw_key=None, 140 label_paths=label_paths, 141 label_key=None, 142 patch_shape=patch_shape, 143 is_seg_dataset=False, 144 **kwargs 145 ) 146 147 148def get_lynsec_loader( 149 path: Union[os.PathLike, str], 150 batch_size: int, 151 patch_shape: Tuple[int, int], 152 choice: Optional[Literal['ihc', 'h&e']] = None, 153 resize_inputs: bool = False, 154 download: bool = False, 155 **kwargs 156) -> DataLoader: 157 """Get the LyNSeC dataloader for nucleus segmentation. 158 159 Args: 160 path: Filepath to a folder where the downloaded data will be saved. 161 batch_size: The batch size for training. 162 patch_shape: The patch shape to use for training. 163 choice: The choice of dataset. 164 resize_inputs: Whether to resize the inputs. 165 download: Whether to download the data if it is not present. 166 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 167 168 Returns: 169 The DataLoader. 170 """ 171 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 172 dataset = get_lynsec_dataset(path, patch_shape, choice, resize_inputs, download, **ds_kwargs) 173 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/8065174/files/lynsec.zip'
CHECKSUM =
'14b9b5a9c39cb41afc7f31de5a995cefff0947c215e14ab9c7a463f32fbbf4b6'
def
get_lynsec_data(path: Union[os.PathLike, str], download: bool = False) -> str:
59def get_lynsec_data(path: Union[os.PathLike, str], download: bool = False) -> str: 60 """Download the LyNSeC dataset for nucleus segmentation. 61 62 Args: 63 path: Filepath to a folder where the downloaded data will be saved. 64 download: Whether to download the data if it is not present. 65 66 Returns: 67 The filepath to the downloaded data. 68 """ 69 data_dir = os.path.join(path, "data") 70 if os.path.exists(data_dir): 71 return data_dir 72 73 os.makedirs(data_dir, exist_ok=True) 74 75 zip_path = os.path.join(path, "lynsec.zip") 76 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 77 util.unzip(zip_path=zip_path, dst=data_dir) 78 79 _preprocess_dataset(data_dir) 80 81 return data_dir
Download the LyNSeC dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the downloaded data.
def
get_lynsec_paths( path: Union[os.PathLike, str], choice: Optional[Literal['ihc', 'h&e']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
84def get_lynsec_paths( 85 path: Union[os.PathLike, str], choice: Optional[Literal['ihc', 'h&e']] = None, download: bool = False 86) -> Tuple[List[str], List[str]]: 87 """Get paths to the LyNSec data. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 choice: The choice of dataset. 92 download: Whether to download the data if it is not present. 93 94 Returns: 95 List of filepaths to the image data. 96 List of filepaths to the label data. 97 """ 98 data_dir = get_lynsec_data(path, download) 99 100 if choice is None: 101 choice = "*" 102 103 raw_paths = natsorted(glob(os.path.join(data_dir, choice, "images", "*.tif"))) 104 label_paths = natsorted(glob(os.path.join(data_dir, choice, "labels", "*.tif"))) 105 106 return raw_paths, label_paths
Get paths to the LyNSec data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- choice: The choice of dataset.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths to the image data. List of filepaths to the label data.
def
get_lynsec_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], choice: Optional[Literal['ihc', 'h&e']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
109def get_lynsec_dataset( 110 path: Union[os.PathLike, str], 111 patch_shape: Tuple[int, int], 112 choice: Optional[Literal['ihc', 'h&e']] = None, 113 resize_inputs: bool = False, 114 download: bool = False, 115 **kwargs 116) -> Dataset: 117 """Get the LyNSeC dataset for nucleus segmentation. 118 119 Args: 120 path: Filepath to a folder where the downloaded data will be saved. 121 patch_shape: The patch shape to use for training. 122 choice: The choice of dataset. 123 resize_inputs: Whether to resize the inputs. 124 download: Whether to download the data if it is not present. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 126 127 Returns: 128 The segmentation dataset. 129 """ 130 raw_paths, label_paths = get_lynsec_paths(path, choice, download) 131 132 if resize_inputs: 133 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 134 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 135 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 136 ) 137 138 return torch_em.default_segmentation_dataset( 139 raw_paths=raw_paths, 140 raw_key=None, 141 label_paths=label_paths, 142 label_key=None, 143 patch_shape=patch_shape, 144 is_seg_dataset=False, 145 **kwargs 146 )
Get the LyNSeC dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- choice: The choice of dataset.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_lynsec_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], choice: Optional[Literal['ihc', 'h&e']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
149def get_lynsec_loader( 150 path: Union[os.PathLike, str], 151 batch_size: int, 152 patch_shape: Tuple[int, int], 153 choice: Optional[Literal['ihc', 'h&e']] = None, 154 resize_inputs: bool = False, 155 download: bool = False, 156 **kwargs 157) -> DataLoader: 158 """Get the LyNSeC dataloader for nucleus segmentation. 159 160 Args: 161 path: Filepath to a folder where the downloaded data will be saved. 162 batch_size: The batch size for training. 163 patch_shape: The patch shape to use for training. 164 choice: The choice of dataset. 165 resize_inputs: Whether to resize the inputs. 166 download: Whether to download the data if it is not present. 167 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 168 169 Returns: 170 The DataLoader. 171 """ 172 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 173 dataset = get_lynsec_dataset(path, patch_shape, choice, resize_inputs, download, **ds_kwargs) 174 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the LyNSeC dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- choice: The choice of dataset.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.