torch_em.data.datasets.histopathology.segpath

SegPath contains semantic segmentation masks for H&E stained histopathology images from diverse cancer tissues.

The dataset is located at https://dakomura.github.io/SegPath/ (across several Zenodo links). The dataset is from the publication https://doi.org/10.1016/j.patter.2023.100688. Please cite it if you use this dataset for your research.

View Source

  1"""SegPath contains semantic segmentation masks for H&E stained histopathology images from diverse cancer tissues.
  2
  3The dataset is located at https://dakomura.github.io/SegPath/ (across several Zenodo links).
  4The dataset is from the publication https://doi.org/10.1016/j.patter.2023.100688.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import csv
  9import gzip
 10import os
 11import tarfile
 12from multiprocessing import Pool, cpu_count
 13from pathlib import Path
 14from shutil import rmtree
 15from typing import List, Literal, Optional, Tuple, Union
 16
 17import h5py
 18import imageio.v3 as imageio
 19from tqdm import tqdm
 20
 21import torch
 22from torch.utils.data import Dataset, DataLoader
 23
 24import torch_em
 25
 26from .. import util
 27
 28
 29URLS = {
 30    "epithelium": {
 31        "data": "https://zenodo.org/api/records/7412731/files/panCK_Epithelium.tar.gz/content",
 32        "metadata": "https://zenodo.org/api/records/7412731/files/panCK_fileinfo.csv/content",
 33        "data_name": "panCK_Epithelium.tar.gz",
 34        "metadata_name": "panCK_fileinfo.csv",
 35    },
 36    "smooth_muscle": {
 37        "data": "https://zenodo.org/api/records/7412732/files/aSMA_SmoothMuscle.tar.gz/content",
 38        "metadata": "https://zenodo.org/api/records/7412732/files/aSMA_fileinfo.csv/content",
 39        "data_name": "aSMA_SmoothMuscle.tar.gz",
 40        "metadata_name": "aSMA_fileinfo.csv",
 41    },
 42    "red_blood_cells": {
 43        "data": "https://zenodo.org/api/records/7412580/files/CD235a_RBC.tar.gz/content",
 44        "metadata": "https://zenodo.org/api/records/7412580/files/CD235a_fileinfo.csv/content",
 45        "data_name": "CD235a_RBC.tar.gz",
 46        "metadata_name": "CD235a_fileinfo.csv",
 47    },
 48    "leukocytes": {
 49        "data": "https://zenodo.org/api/records/7412739/files/CD45RB_Leukocyte.tar.gz/content",
 50        "metadata": "https://zenodo.org/api/records/7412739/files/CD45RB_fileinfo.csv/content",
 51        "data_name": "CD45RB_Leukocyte.tar.gz",
 52        "metadata_name": "CD45RB_fileinfo.csv",
 53    },
 54    "lymphocytes": {
 55        "data": "https://zenodo.org/api/records/7412529/files/CD3CD20_Lymphocyte.tar.gz/content",
 56        "metadata": "https://zenodo.org/api/records/7412529/files/CD3CD20_fileinfo.csv/content",
 57        "data_name": "CD3CD20_Lymphocyte.tar.gz",
 58        "metadata_name": "CD3CD20_fileinfo.csv",
 59    },
 60    "endothelium": {
 61        "data": "https://zenodo.org/api/records/7412512/files/ERG_Endothelium.tar.gz/content",
 62        "metadata": "https://zenodo.org/api/records/7412512/files/ERG_fileinfo.csv/content",
 63        "data_name": "ERG_Endothelium.tar.gz",
 64        "metadata_name": "ERG_fileinfo.csv",
 65    },
 66    "plasma_cells": {
 67        "data": "https://zenodo.org/api/records/7412500/files/MIST1_PlasmaCell.tar.gz/content",
 68        "metadata": "https://zenodo.org/api/records/7412500/files/MIST1_fileinfo.csv/content",
 69        "data_name": "MIST1_PlasmaCell.tar.gz",
 70        "metadata_name": "MIST1_fileinfo.csv",
 71    },
 72    "myeloid_cells": {
 73        "data": "https://zenodo.org/api/records/7412690/files/MNDA_MyeloidCell.tar.gz/content",
 74        "metadata": "https://zenodo.org/api/records/7412690/files/MNDA_fileinfo.csv/content",
 75        "data_name": "MNDA_MyeloidCell.tar.gz",
 76        "metadata_name": "MNDA_fileinfo.csv",
 77    },
 78}
 79
 80
 81def _get_data_name(cell_type):
 82    return URLS[cell_type]["data_name"].split(".")[0]
 83
 84
 85def _to_cell_types(cell_types):
 86    if cell_types is None:
 87        return list(URLS)
 88    if isinstance(cell_types, str):
 89        cell_types = [cell_types]
 90    invalid_cell_types = set(cell_types) - set(URLS)
 91    if invalid_cell_types:
 92        raise ValueError(
 93            f"Invalid cell type choices: {sorted(invalid_cell_types)}. Choose from {sorted(URLS)}."
 94        )
 95    return cell_types
 96
 97
 98def _is_gzip(path):
 99    with open(path, "rb") as f:
100        return f.read(2) == b"\x1f\x8b"
101
102
103def _save_as_h5(sample_path):
104    img_path, mask_path, h5_path = sample_path
105    img = imageio.imread(img_path)
106    mask = imageio.imread(mask_path)
107    if img.ndim == 3 and img.shape[-1] == 4:
108        img = img[:, :, :-1]
109    img = img.transpose(2, 0, 1)  # (H, W, C) -> (C, H, W)
110    _, h, w = img.shape
111    chunk_hw = (min(256, h), min(256, w))
112    with h5py.File(h5_path, "w") as f:
113        f.create_dataset(name="images/raw", data=img, compression="gzip", chunks=(1,) + chunk_hw)
114        f.create_dataset(name="labels/mask", data=mask, compression="gzip", chunks=chunk_hw)
115
116
117def _extract_data(tar_path, extract_path):
118    extract_root = tar_path.parent.resolve() / "unprocessed"
119
120    with tarfile.open(tar_path) as f:
121        for member in tqdm(f.getmembers(), desc="Extracting data"):
122            member_path = (extract_root / member.name).resolve()
123            try:
124                member_path.relative_to(extract_root)
125            except ValueError:
126                raise RuntimeError(f"Unsafe path in tar archive: {member.name}")
127        f.extractall(extract_root)
128
129    tar_path.unlink()
130
131    h5_dir = extract_path / "data"
132    h5_dir.mkdir(exist_ok=True, parents=True)
133    sample_paths = [
134        (p, p.with_name(p.name.replace("_HE.png", "_mask.png")), h5_dir / p.with_suffix(".h5").name)
135        for p in (extract_root / extract_path.name).glob("*_HE.png")
136    ]
137
138    with Pool(max(1, cpu_count() - 1)) as p:
139        list(tqdm(
140            p.imap_unordered(_save_as_h5, sample_paths),
141            total=len(sample_paths),
142            desc="Saving to H5"
143        ))
144
145    rmtree(extract_root)
146
147
148def get_segpath_data(
149    path: Union[os.PathLike, str],
150    cell_types: Optional[Union[str, List[str]]] = None,
151    download: bool = False,
152) -> None:
153    """Download the SegPath data.
154
155    Args:
156        path: Filepath to a folder where the downloaded data will be saved.
157        cell_types: The cell types to download. By default all cell types are downloaded.
158        download: Whether to download the data if it is not present.
159    """
160    path = Path(path)
161    path.mkdir(parents=True, exist_ok=True)
162    if not download:
163        return
164
165    for cell_type in _to_cell_types(cell_types):
166        source = URLS[cell_type]
167        tar_path = path / source["data_name"]
168        metadata_path = path / source["metadata_name"]
169        extracted_path = path / _get_data_name(cell_type)
170
171        util.download_source(metadata_path, source["metadata"], download, checksum=None)
172
173        if not (extracted_path / "data").exists():
174            util.download_source(tar_path, source["data"], download, checksum=None)
175            _extract_data(tar_path, extracted_path)
176
177
178def _get_paths_from_metadata(path, cell_type, split):
179    source = URLS[cell_type]
180    metadata_path = path / source["metadata_name"]
181    volume_paths = []
182    volume_dir = path / _get_data_name(cell_type) / "data"
183
184    open_file = gzip.open if _is_gzip(metadata_path) else open
185    with open_file(metadata_path, mode="rt") as f:
186        reader = csv.DictReader(f)
187        for row in reader:
188            if split is not None and row["train_val_test"] != split:
189                continue
190
191            filename = row["filename"]
192            if not filename.endswith("_HE.png"):
193                continue
194
195            volume_path = volume_dir / Path(filename).name.replace(".png", ".h5")
196
197            if not volume_path.exists():
198                continue
199
200            volume_paths.append(volume_path)
201
202    return volume_paths
203
204
205def _get_paths_from_files(path, cell_type, split):
206    if split is not None:
207        raise RuntimeError(
208            "The SegPath metadata CSV is required for split selection, but it could not be found. "
209            "Please download the metadata with `download=True` or place it into the dataset folder."
210        )
211
212    data_name = _get_data_name(cell_type)
213
214    return sorted((path / data_name / "data").glob("*.h5"))
215
216
217def get_segpath_paths(
218    path: Union[os.PathLike, str],
219    cell_types: Optional[Union[str, List[str]]] = None,
220    split: Optional[Literal["train", "val", "test"]] = None,
221    download: bool = False,
222) -> List[str]:
223    """Get paths to the SegPath data.
224
225    Args:
226        path: Filepath to a folder where the downloaded data will be saved.
227        cell_types: The cell types to use. By default all cell types are used.
228        split: The split to use. Either "train", "val", "test" or None for all images.
229        download: Whether to download the data if it is not present.
230
231    Returns:
232        List of filepaths to the preprocessed H5 files.
233    """
234    path = Path(path)
235    if split is not None and split not in ("train", "val", "test"):
236        raise ValueError(f"'{split}' is not a valid split choice.")
237
238    cell_types = _to_cell_types(cell_types)
239    get_segpath_data(path, cell_types, download)
240
241    volume_paths = []
242    for cell_type in cell_types:
243        metadata_path = path / URLS[cell_type]["metadata_name"]
244        if metadata_path.exists():
245            this_volume_paths = _get_paths_from_metadata(path, cell_type, split)
246        else:
247            this_volume_paths = _get_paths_from_files(path, cell_type, split)
248
249        volume_paths.extend(this_volume_paths)
250
251    if not volume_paths:
252        raise RuntimeError("Could not find any SegPath images and masks for the requested settings.")
253
254    return sorted(str(p) for p in volume_paths)
255
256
257def get_segpath_dataset(
258    path: Union[os.PathLike, str],
259    patch_shape: Tuple[int, int],
260    cell_types: Optional[Union[str, List[str]]] = None,
261    split: Optional[Literal["train", "val", "test"]] = None,
262    download: bool = False,
263    label_dtype: torch.dtype = torch.int64,
264    resize_inputs: bool = False,
265    **kwargs
266) -> Dataset:
267    """Get the SegPath dataset for semantic segmentation in H&E stained histopathology images.
268
269    Args:
270        path: Filepath to a folder where the downloaded data will be saved.
271        patch_shape: The patch shape to use for training.
272        cell_types: The cell types to use. By default all cell types are used.
273        split: The split to use. Either "train", "val", "test" or None for all images.
274        download: Whether to download the data if it is not present.
275        label_dtype: The datatype of labels.
276        resize_inputs: Whether to resize the input images.
277        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
278
279    Returns:
280        The segmentation dataset.
281    """
282    volume_paths = get_segpath_paths(path, cell_types, split, download)
283
284    if resize_inputs:
285        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
286        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
287            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
288        )
289
290    return torch_em.default_segmentation_dataset(
291        raw_paths=volume_paths,
292        raw_key="images/raw",
293        label_paths=volume_paths,
294        label_key="labels/mask",
295        patch_shape=patch_shape,
296        label_dtype=label_dtype,
297        is_seg_dataset=True,
298        with_channels=True,
299        ndim=2,
300        **kwargs
301    )
302
303
304def get_segpath_loader(
305    path: Union[os.PathLike, str],
306    patch_shape: Tuple[int, int],
307    batch_size: int,
308    cell_types: Optional[Union[str, List[str]]] = None,
309    split: Optional[Literal["train", "val", "test"]] = None,
310    download: bool = False,
311    label_dtype: torch.dtype = torch.int64,
312    resize_inputs: bool = False,
313    **kwargs
314) -> DataLoader:
315    """Get the SegPath dataloader.
316
317    Args:
318        path: Filepath to a folder where the downloaded data will be saved.
319        patch_shape: The patch shape to use for training.
320        batch_size: The batch size for training.
321        cell_types: The cell types to use. By default all cell types are used.
322        split: The split to use. Either "train", "val", "test" or None for all images.
323        download: Whether to download the data if it is not present.
324        label_dtype: The datatype of labels.
325        resize_inputs: Whether to resize the input images.
326        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
327
328    Returns:
329        The DataLoader.
330    """
331    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
332    dataset = get_segpath_dataset(
333        path=path, patch_shape=patch_shape, cell_types=cell_types, split=split, download=download,
334        label_dtype=label_dtype, resize_inputs=resize_inputs, **ds_kwargs
335    )
336    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URLS = {'epithelium': {'data': 'https://zenodo.org/api/records/7412731/files/panCK_Epithelium.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412731/files/panCK_fileinfo.csv/content', 'data_name': 'panCK_Epithelium.tar.gz', 'metadata_name': 'panCK_fileinfo.csv'}, 'smooth_muscle': {'data': 'https://zenodo.org/api/records/7412732/files/aSMA_SmoothMuscle.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412732/files/aSMA_fileinfo.csv/content', 'data_name': 'aSMA_SmoothMuscle.tar.gz', 'metadata_name': 'aSMA_fileinfo.csv'}, 'red_blood_cells': {'data': 'https://zenodo.org/api/records/7412580/files/CD235a_RBC.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412580/files/CD235a_fileinfo.csv/content', 'data_name': 'CD235a_RBC.tar.gz', 'metadata_name': 'CD235a_fileinfo.csv'}, 'leukocytes': {'data': 'https://zenodo.org/api/records/7412739/files/CD45RB_Leukocyte.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412739/files/CD45RB_fileinfo.csv/content', 'data_name': 'CD45RB_Leukocyte.tar.gz', 'metadata_name': 'CD45RB_fileinfo.csv'}, 'lymphocytes': {'data': 'https://zenodo.org/api/records/7412529/files/CD3CD20_Lymphocyte.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412529/files/CD3CD20_fileinfo.csv/content', 'data_name': 'CD3CD20_Lymphocyte.tar.gz', 'metadata_name': 'CD3CD20_fileinfo.csv'}, 'endothelium': {'data': 'https://zenodo.org/api/records/7412512/files/ERG_Endothelium.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412512/files/ERG_fileinfo.csv/content', 'data_name': 'ERG_Endothelium.tar.gz', 'metadata_name': 'ERG_fileinfo.csv'}, 'plasma_cells': {'data': 'https://zenodo.org/api/records/7412500/files/MIST1_PlasmaCell.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412500/files/MIST1_fileinfo.csv/content', 'data_name': 'MIST1_PlasmaCell.tar.gz', 'metadata_name': 'MIST1_fileinfo.csv'}, 'myeloid_cells': {'data': 'https://zenodo.org/api/records/7412690/files/MNDA_MyeloidCell.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412690/files/MNDA_fileinfo.csv/content', 'data_name': 'MNDA_MyeloidCell.tar.gz', 'metadata_name': 'MNDA_fileinfo.csv'}}

def get_segpath_data( path: Union[os.PathLike, str], cell_types: Union[List[str], str, NoneType] = None, download: bool = False) -> None: View Source

149def get_segpath_data(
150    path: Union[os.PathLike, str],
151    cell_types: Optional[Union[str, List[str]]] = None,
152    download: bool = False,
153) -> None:
154    """Download the SegPath data.
155
156    Args:
157        path: Filepath to a folder where the downloaded data will be saved.
158        cell_types: The cell types to download. By default all cell types are downloaded.
159        download: Whether to download the data if it is not present.
160    """
161    path = Path(path)
162    path.mkdir(parents=True, exist_ok=True)
163    if not download:
164        return
165
166    for cell_type in _to_cell_types(cell_types):
167        source = URLS[cell_type]
168        tar_path = path / source["data_name"]
169        metadata_path = path / source["metadata_name"]
170        extracted_path = path / _get_data_name(cell_type)
171
172        util.download_source(metadata_path, source["metadata"], download, checksum=None)
173
174        if not (extracted_path / "data").exists():
175            util.download_source(tar_path, source["data"], download, checksum=None)
176            _extract_data(tar_path, extracted_path)

Download the SegPath data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
cell_types: The cell types to download. By default all cell types are downloaded.
download: Whether to download the data if it is not present.

def get_segpath_paths( path: Union[os.PathLike, str], cell_types: Union[List[str], str, NoneType] = None, split: Optional[Literal['train', 'val', 'test']] = None, download: bool = False) -> List[str]: View Source

218def get_segpath_paths(
219    path: Union[os.PathLike, str],
220    cell_types: Optional[Union[str, List[str]]] = None,
221    split: Optional[Literal["train", "val", "test"]] = None,
222    download: bool = False,
223) -> List[str]:
224    """Get paths to the SegPath data.
225
226    Args:
227        path: Filepath to a folder where the downloaded data will be saved.
228        cell_types: The cell types to use. By default all cell types are used.
229        split: The split to use. Either "train", "val", "test" or None for all images.
230        download: Whether to download the data if it is not present.
231
232    Returns:
233        List of filepaths to the preprocessed H5 files.
234    """
235    path = Path(path)
236    if split is not None and split not in ("train", "val", "test"):
237        raise ValueError(f"'{split}' is not a valid split choice.")
238
239    cell_types = _to_cell_types(cell_types)
240    get_segpath_data(path, cell_types, download)
241
242    volume_paths = []
243    for cell_type in cell_types:
244        metadata_path = path / URLS[cell_type]["metadata_name"]
245        if metadata_path.exists():
246            this_volume_paths = _get_paths_from_metadata(path, cell_type, split)
247        else:
248            this_volume_paths = _get_paths_from_files(path, cell_type, split)
249
250        volume_paths.extend(this_volume_paths)
251
252    if not volume_paths:
253        raise RuntimeError("Could not find any SegPath images and masks for the requested settings.")
254
255    return sorted(str(p) for p in volume_paths)

Get paths to the SegPath data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
cell_types: The cell types to use. By default all cell types are used.
split: The split to use. Either "train", "val", "test" or None for all images.
download: Whether to download the data if it is not present.

Returns:

List of filepaths to the preprocessed H5 files.

def get_segpath_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], cell_types: Union[List[str], str, NoneType] = None, split: Optional[Literal['train', 'val', 'test']] = None, download: bool = False, label_dtype: torch.dtype = torch.int64, resize_inputs: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

258def get_segpath_dataset(
259    path: Union[os.PathLike, str],
260    patch_shape: Tuple[int, int],
261    cell_types: Optional[Union[str, List[str]]] = None,
262    split: Optional[Literal["train", "val", "test"]] = None,
263    download: bool = False,
264    label_dtype: torch.dtype = torch.int64,
265    resize_inputs: bool = False,
266    **kwargs
267) -> Dataset:
268    """Get the SegPath dataset for semantic segmentation in H&E stained histopathology images.
269
270    Args:
271        path: Filepath to a folder where the downloaded data will be saved.
272        patch_shape: The patch shape to use for training.
273        cell_types: The cell types to use. By default all cell types are used.
274        split: The split to use. Either "train", "val", "test" or None for all images.
275        download: Whether to download the data if it is not present.
276        label_dtype: The datatype of labels.
277        resize_inputs: Whether to resize the input images.
278        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
279
280    Returns:
281        The segmentation dataset.
282    """
283    volume_paths = get_segpath_paths(path, cell_types, split, download)
284
285    if resize_inputs:
286        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
287        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
288            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
289        )
290
291    return torch_em.default_segmentation_dataset(
292        raw_paths=volume_paths,
293        raw_key="images/raw",
294        label_paths=volume_paths,
295        label_key="labels/mask",
296        patch_shape=patch_shape,
297        label_dtype=label_dtype,
298        is_seg_dataset=True,
299        with_channels=True,
300        ndim=2,
301        **kwargs
302    )

Get the SegPath dataset for semantic segmentation in H&E stained histopathology images.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
cell_types: The cell types to use. By default all cell types are used.
split: The split to use. Either "train", "val", "test" or None for all images.
download: Whether to download the data if it is not present.
label_dtype: The datatype of labels.
resize_inputs: Whether to resize the input images.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_segpath_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, cell_types: Union[List[str], str, NoneType] = None, split: Optional[Literal['train', 'val', 'test']] = None, download: bool = False, label_dtype: torch.dtype = torch.int64, resize_inputs: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

305def get_segpath_loader(
306    path: Union[os.PathLike, str],
307    patch_shape: Tuple[int, int],
308    batch_size: int,
309    cell_types: Optional[Union[str, List[str]]] = None,
310    split: Optional[Literal["train", "val", "test"]] = None,
311    download: bool = False,
312    label_dtype: torch.dtype = torch.int64,
313    resize_inputs: bool = False,
314    **kwargs
315) -> DataLoader:
316    """Get the SegPath dataloader.
317
318    Args:
319        path: Filepath to a folder where the downloaded data will be saved.
320        patch_shape: The patch shape to use for training.
321        batch_size: The batch size for training.
322        cell_types: The cell types to use. By default all cell types are used.
323        split: The split to use. Either "train", "val", "test" or None for all images.
324        download: Whether to download the data if it is not present.
325        label_dtype: The datatype of labels.
326        resize_inputs: Whether to resize the input images.
327        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
328
329    Returns:
330        The DataLoader.
331    """
332    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
333    dataset = get_segpath_dataset(
334        path=path, patch_shape=patch_shape, cell_types=cell_types, split=split, download=download,
335        label_dtype=label_dtype, resize_inputs=resize_inputs, **ds_kwargs
336    )
337    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the SegPath dataloader.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
batch_size: The batch size for training.
cell_types: The cell types to use. By default all cell types are used.
split: The split to use. Either "train", "val", "test" or None for all images.
download: Whether to download the data if it is not present.
label_dtype: The datatype of labels.
resize_inputs: Whether to resize the input images.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.