torch_em.data.datasets.histopathology.lynsec

The LyNSeC dataset contains annotations for nucleus segmentation in IHC and H&E stained lymphoma tissue images.

The dataset is located at https://doi.org/10.5281/zenodo.8065174. This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2024.107978. Please cite it if you use this dataset in your research.

  1"""The LyNSeC dataset contains annotations for nucleus segmentation
  2in IHC and H&E stained lymphoma tissue images.
  3
  4The dataset is located at https://doi.org/10.5281/zenodo.8065174.
  5This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2024.107978.
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from tqdm import tqdm
 12from pathlib import Path
 13from natsort import natsorted
 14from typing import Union, Tuple, List, Optional, Literal
 15
 16import numpy as np
 17import imageio.v3 as imageio
 18
 19import torch_em
 20
 21from torch.utils.data import Dataset, DataLoader
 22
 23from .. import util
 24
 25
 26URL = "https://zenodo.org/records/8065174/files/lynsec.zip"
 27CHECKSUM = "14b9b5a9c39cb41afc7f31de5a995cefff0947c215e14ab9c7a463f32fbbf4b6"
 28
 29
 30def _preprocess_dataset(data_dir):
 31    data_dirs = natsorted(glob(os.path.join(data_dir, "lynsec*")))
 32    for _dir in data_dirs:
 33        if os.path.basename(_dir) == "lynsec 1":
 34            target_dir = "ihc"
 35        else:
 36            target_dir = "h&e"
 37
 38        image_dir = os.path.join(data_dir, target_dir, "images")
 39        label_dir = os.path.join(data_dir, target_dir, "labels")
 40        os.makedirs(image_dir, exist_ok=True)
 41        os.makedirs(label_dir, exist_ok=True)
 42
 43        paths = natsorted(glob(os.path.join(_dir, "*.npy")))
 44        for fpath in tqdm(paths, desc="Preprocessing inputs"):
 45            fname = Path(fpath).stem
 46            darray = np.load(fpath)
 47
 48            raw = darray[..., :3]
 49            labels = darray[..., 3]
 50
 51            if target_dir == "h&e" and fname in [f"{i}_l2" for i in range(35)]:  # set of images have mismatching labels
 52                continue
 53
 54            imageio.imwrite(os.path.join(image_dir, f"{fname}.tif"), raw, compression="zlib")
 55            imageio.imwrite(os.path.join(label_dir, f"{fname}.tif"), labels, compression="zlib")
 56
 57
 58def get_lynsec_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 59    """Download the LyNSeC dataset for nucleus segmentation.
 60
 61    Args:
 62        path: Filepath to a folder where the downloaded data will be saved.
 63        download: Whether to download the data if it is not present.
 64
 65    Returns:
 66        The filepath to the downloaded data.
 67    """
 68    data_dir = os.path.join(path, "data")
 69    if os.path.exists(data_dir):
 70        return data_dir
 71
 72    os.makedirs(data_dir, exist_ok=True)
 73
 74    zip_path = os.path.join(path, "lynsec.zip")
 75    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 76    util.unzip(zip_path=zip_path, dst=data_dir)
 77
 78    _preprocess_dataset(data_dir)
 79
 80    return data_dir
 81
 82
 83def get_lynsec_paths(
 84    path: Union[os.PathLike, str], choice: Optional[Literal['ihc', 'h&e']] = None, download: bool = False
 85) -> Tuple[List[str], List[str]]:
 86    """Get paths to the LyNSec data.
 87
 88    Args:
 89        path: Filepath to a folder where the downloaded data will be saved.
 90        choice: The choice of dataset.
 91        download: Whether to download the data if it is not present.
 92
 93    Returns:
 94        List of filepaths to the image data.
 95        List of filepaths to the label data.
 96    """
 97    data_dir = get_lynsec_data(path, download)
 98
 99    if choice is None:
100        choice = "*"
101
102    raw_paths = natsorted(glob(os.path.join(data_dir, choice, "images", "*.tif")))
103    label_paths = natsorted(glob(os.path.join(data_dir, choice, "labels", "*.tif")))
104
105    return raw_paths, label_paths
106
107
108def get_lynsec_dataset(
109    path: Union[os.PathLike, str],
110    patch_shape: Tuple[int, int],
111    choice: Optional[Literal['ihc', 'h&e']] = None,
112    resize_inputs: bool = False,
113    download: bool = False,
114    **kwargs
115) -> Dataset:
116    """Get the LyNSeC dataset for nucleus segmentation.
117
118    Args:
119        path: Filepath to a folder where the downloaded data will be saved.
120        patch_shape: The patch shape to use for training.
121        choice: The choice of dataset.
122        resize_inputs: Whether to resize the inputs.
123        download: Whether to download the data if it is not present.
124        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
125
126    Returns:
127        The segmentation dataset.
128    """
129    raw_paths, label_paths = get_lynsec_paths(path, choice, download)
130
131    if resize_inputs:
132        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
133        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
134            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
135        )
136
137    return torch_em.default_segmentation_dataset(
138        raw_paths=raw_paths,
139        raw_key=None,
140        label_paths=label_paths,
141        label_key=None,
142        patch_shape=patch_shape,
143        is_seg_dataset=False,
144        **kwargs
145    )
146
147
148def get_lynsec_loader(
149    path: Union[os.PathLike, str],
150    batch_size: int,
151    patch_shape: Tuple[int, int],
152    choice: Optional[Literal['ihc', 'h&e']] = None,
153    resize_inputs: bool = False,
154    download: bool = False,
155    **kwargs
156) -> DataLoader:
157    """Get the LyNSeC dataloader for nucleus segmentation.
158
159    Args:
160        path: Filepath to a folder where the downloaded data will be saved.
161        batch_size: The batch size for training.
162        patch_shape: The patch shape to use for training.
163        choice: The choice of dataset.
164        resize_inputs: Whether to resize the inputs.
165        download: Whether to download the data if it is not present.
166        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
167
168    Returns:
169        The DataLoader.
170    """
171    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
172    dataset = get_lynsec_dataset(path, patch_shape, choice, resize_inputs, download, **ds_kwargs)
173    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/8065174/files/lynsec.zip'
CHECKSUM = '14b9b5a9c39cb41afc7f31de5a995cefff0947c215e14ab9c7a463f32fbbf4b6'
def get_lynsec_data(path: Union[os.PathLike, str], download: bool = False) -> str:
59def get_lynsec_data(path: Union[os.PathLike, str], download: bool = False) -> str:
60    """Download the LyNSeC dataset for nucleus segmentation.
61
62    Args:
63        path: Filepath to a folder where the downloaded data will be saved.
64        download: Whether to download the data if it is not present.
65
66    Returns:
67        The filepath to the downloaded data.
68    """
69    data_dir = os.path.join(path, "data")
70    if os.path.exists(data_dir):
71        return data_dir
72
73    os.makedirs(data_dir, exist_ok=True)
74
75    zip_path = os.path.join(path, "lynsec.zip")
76    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
77    util.unzip(zip_path=zip_path, dst=data_dir)
78
79    _preprocess_dataset(data_dir)
80
81    return data_dir

Download the LyNSeC dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the downloaded data.

def get_lynsec_paths( path: Union[os.PathLike, str], choice: Optional[Literal['ihc', 'h&e']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
 84def get_lynsec_paths(
 85    path: Union[os.PathLike, str], choice: Optional[Literal['ihc', 'h&e']] = None, download: bool = False
 86) -> Tuple[List[str], List[str]]:
 87    """Get paths to the LyNSec data.
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        choice: The choice of dataset.
 92        download: Whether to download the data if it is not present.
 93
 94    Returns:
 95        List of filepaths to the image data.
 96        List of filepaths to the label data.
 97    """
 98    data_dir = get_lynsec_data(path, download)
 99
100    if choice is None:
101        choice = "*"
102
103    raw_paths = natsorted(glob(os.path.join(data_dir, choice, "images", "*.tif")))
104    label_paths = natsorted(glob(os.path.join(data_dir, choice, "labels", "*.tif")))
105
106    return raw_paths, label_paths

Get paths to the LyNSec data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • choice: The choice of dataset.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths to the image data. List of filepaths to the label data.

def get_lynsec_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], choice: Optional[Literal['ihc', 'h&e']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
109def get_lynsec_dataset(
110    path: Union[os.PathLike, str],
111    patch_shape: Tuple[int, int],
112    choice: Optional[Literal['ihc', 'h&e']] = None,
113    resize_inputs: bool = False,
114    download: bool = False,
115    **kwargs
116) -> Dataset:
117    """Get the LyNSeC dataset for nucleus segmentation.
118
119    Args:
120        path: Filepath to a folder where the downloaded data will be saved.
121        patch_shape: The patch shape to use for training.
122        choice: The choice of dataset.
123        resize_inputs: Whether to resize the inputs.
124        download: Whether to download the data if it is not present.
125        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
126
127    Returns:
128        The segmentation dataset.
129    """
130    raw_paths, label_paths = get_lynsec_paths(path, choice, download)
131
132    if resize_inputs:
133        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
134        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
135            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
136        )
137
138    return torch_em.default_segmentation_dataset(
139        raw_paths=raw_paths,
140        raw_key=None,
141        label_paths=label_paths,
142        label_key=None,
143        patch_shape=patch_shape,
144        is_seg_dataset=False,
145        **kwargs
146    )

Get the LyNSeC dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • choice: The choice of dataset.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_lynsec_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], choice: Optional[Literal['ihc', 'h&e']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
149def get_lynsec_loader(
150    path: Union[os.PathLike, str],
151    batch_size: int,
152    patch_shape: Tuple[int, int],
153    choice: Optional[Literal['ihc', 'h&e']] = None,
154    resize_inputs: bool = False,
155    download: bool = False,
156    **kwargs
157) -> DataLoader:
158    """Get the LyNSeC dataloader for nucleus segmentation.
159
160    Args:
161        path: Filepath to a folder where the downloaded data will be saved.
162        batch_size: The batch size for training.
163        patch_shape: The patch shape to use for training.
164        choice: The choice of dataset.
165        resize_inputs: Whether to resize the inputs.
166        download: Whether to download the data if it is not present.
167        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
168
169    Returns:
170        The DataLoader.
171    """
172    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
173    dataset = get_lynsec_dataset(path, patch_shape, choice, resize_inputs, download, **ds_kwargs)
174    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the LyNSeC dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • choice: The choice of dataset.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.