torch_em.data.datasets.histopathology.segpath
SegPath contains semantic segmentation masks for H&E stained histopathology images from diverse cancer tissues.
The dataset is located at https://dakomura.github.io/SegPath/ (across several Zenodo links). The dataset is from the publication https://doi.org/10.1016/j.patter.2023.100688. Please cite it if you use this dataset for your research.
1"""SegPath contains semantic segmentation masks for H&E stained histopathology images from diverse cancer tissues. 2 3The dataset is located at https://dakomura.github.io/SegPath/ (across several Zenodo links). 4The dataset is from the publication https://doi.org/10.1016/j.patter.2023.100688. 5Please cite it if you use this dataset for your research. 6""" 7 8import csv 9import gzip 10import os 11import tarfile 12from multiprocessing import Pool, cpu_count 13from pathlib import Path 14from shutil import rmtree 15from typing import List, Literal, Optional, Tuple, Union 16 17import h5py 18import imageio.v3 as imageio 19from tqdm import tqdm 20 21import torch 22from torch.utils.data import Dataset, DataLoader 23 24import torch_em 25 26from .. import util 27 28 29URLS = { 30 "epithelium": { 31 "data": "https://zenodo.org/api/records/7412731/files/panCK_Epithelium.tar.gz/content", 32 "metadata": "https://zenodo.org/api/records/7412731/files/panCK_fileinfo.csv/content", 33 "data_name": "panCK_Epithelium.tar.gz", 34 "metadata_name": "panCK_fileinfo.csv", 35 }, 36 "smooth_muscle": { 37 "data": "https://zenodo.org/api/records/7412732/files/aSMA_SmoothMuscle.tar.gz/content", 38 "metadata": "https://zenodo.org/api/records/7412732/files/aSMA_fileinfo.csv/content", 39 "data_name": "aSMA_SmoothMuscle.tar.gz", 40 "metadata_name": "aSMA_fileinfo.csv", 41 }, 42 "red_blood_cells": { 43 "data": "https://zenodo.org/api/records/7412580/files/CD235a_RBC.tar.gz/content", 44 "metadata": "https://zenodo.org/api/records/7412580/files/CD235a_fileinfo.csv/content", 45 "data_name": "CD235a_RBC.tar.gz", 46 "metadata_name": "CD235a_fileinfo.csv", 47 }, 48 "leukocytes": { 49 "data": "https://zenodo.org/api/records/7412739/files/CD45RB_Leukocyte.tar.gz/content", 50 "metadata": "https://zenodo.org/api/records/7412739/files/CD45RB_fileinfo.csv/content", 51 "data_name": "CD45RB_Leukocyte.tar.gz", 52 "metadata_name": "CD45RB_fileinfo.csv", 53 }, 54 "lymphocytes": { 55 "data": "https://zenodo.org/api/records/7412529/files/CD3CD20_Lymphocyte.tar.gz/content", 56 "metadata": "https://zenodo.org/api/records/7412529/files/CD3CD20_fileinfo.csv/content", 57 "data_name": "CD3CD20_Lymphocyte.tar.gz", 58 "metadata_name": "CD3CD20_fileinfo.csv", 59 }, 60 "endothelium": { 61 "data": "https://zenodo.org/api/records/7412512/files/ERG_Endothelium.tar.gz/content", 62 "metadata": "https://zenodo.org/api/records/7412512/files/ERG_fileinfo.csv/content", 63 "data_name": "ERG_Endothelium.tar.gz", 64 "metadata_name": "ERG_fileinfo.csv", 65 }, 66 "plasma_cells": { 67 "data": "https://zenodo.org/api/records/7412500/files/MIST1_PlasmaCell.tar.gz/content", 68 "metadata": "https://zenodo.org/api/records/7412500/files/MIST1_fileinfo.csv/content", 69 "data_name": "MIST1_PlasmaCell.tar.gz", 70 "metadata_name": "MIST1_fileinfo.csv", 71 }, 72 "myeloid_cells": { 73 "data": "https://zenodo.org/api/records/7412690/files/MNDA_MyeloidCell.tar.gz/content", 74 "metadata": "https://zenodo.org/api/records/7412690/files/MNDA_fileinfo.csv/content", 75 "data_name": "MNDA_MyeloidCell.tar.gz", 76 "metadata_name": "MNDA_fileinfo.csv", 77 }, 78} 79 80 81def _get_data_name(cell_type): 82 return URLS[cell_type]["data_name"].split(".")[0] 83 84 85def _to_cell_types(cell_types): 86 if cell_types is None: 87 return list(URLS) 88 if isinstance(cell_types, str): 89 cell_types = [cell_types] 90 invalid_cell_types = set(cell_types) - set(URLS) 91 if invalid_cell_types: 92 raise ValueError( 93 f"Invalid cell type choices: {sorted(invalid_cell_types)}. Choose from {sorted(URLS)}." 94 ) 95 return cell_types 96 97 98def _is_gzip(path): 99 with open(path, "rb") as f: 100 return f.read(2) == b"\x1f\x8b" 101 102 103def _save_as_h5(sample_path): 104 img_path, mask_path, h5_path = sample_path 105 img = imageio.imread(img_path) 106 mask = imageio.imread(mask_path) 107 if img.ndim == 3 and img.shape[-1] == 4: 108 img = img[:, :, :-1] 109 img = img.transpose(2, 0, 1) # (H, W, C) -> (C, H, W) 110 _, h, w = img.shape 111 chunk_hw = (min(256, h), min(256, w)) 112 with h5py.File(h5_path, "w") as f: 113 f.create_dataset(name="images/raw", data=img, compression="gzip", chunks=(1,) + chunk_hw) 114 f.create_dataset(name="labels/mask", data=mask, compression="gzip", chunks=chunk_hw) 115 116 117def _extract_data(tar_path, extract_path): 118 extract_root = tar_path.parent.resolve() / "unprocessed" 119 120 with tarfile.open(tar_path) as f: 121 for member in tqdm(f.getmembers(), desc="Extracting data"): 122 member_path = (extract_root / member.name).resolve() 123 try: 124 member_path.relative_to(extract_root) 125 except ValueError: 126 raise RuntimeError(f"Unsafe path in tar archive: {member.name}") 127 f.extractall(extract_root) 128 129 tar_path.unlink() 130 131 h5_dir = extract_path / "data" 132 h5_dir.mkdir(exist_ok=True, parents=True) 133 sample_paths = [ 134 (p, p.with_name(p.name.replace("_HE.png", "_mask.png")), h5_dir / p.with_suffix(".h5").name) 135 for p in (extract_root / extract_path.name).glob("*_HE.png") 136 ] 137 138 with Pool(max(1, cpu_count() - 1)) as p: 139 list(tqdm( 140 p.imap_unordered(_save_as_h5, sample_paths), 141 total=len(sample_paths), 142 desc="Saving to H5" 143 )) 144 145 rmtree(extract_root) 146 147 148def get_segpath_data( 149 path: Union[os.PathLike, str], 150 cell_types: Optional[Union[str, List[str]]] = None, 151 download: bool = False, 152) -> None: 153 """Download the SegPath data. 154 155 Args: 156 path: Filepath to a folder where the downloaded data will be saved. 157 cell_types: The cell types to download. By default all cell types are downloaded. 158 download: Whether to download the data if it is not present. 159 """ 160 path = Path(path) 161 path.mkdir(parents=True, exist_ok=True) 162 if not download: 163 return 164 165 for cell_type in _to_cell_types(cell_types): 166 source = URLS[cell_type] 167 tar_path = path / source["data_name"] 168 metadata_path = path / source["metadata_name"] 169 extracted_path = path / _get_data_name(cell_type) 170 171 util.download_source(metadata_path, source["metadata"], download, checksum=None) 172 173 if not (extracted_path / "data").exists(): 174 util.download_source(tar_path, source["data"], download, checksum=None) 175 _extract_data(tar_path, extracted_path) 176 177 178def _get_paths_from_metadata(path, cell_type, split): 179 source = URLS[cell_type] 180 metadata_path = path / source["metadata_name"] 181 volume_paths = [] 182 volume_dir = path / _get_data_name(cell_type) / "data" 183 184 open_file = gzip.open if _is_gzip(metadata_path) else open 185 with open_file(metadata_path, mode="rt") as f: 186 reader = csv.DictReader(f) 187 for row in reader: 188 if split is not None and row["train_val_test"] != split: 189 continue 190 191 filename = row["filename"] 192 if not filename.endswith("_HE.png"): 193 continue 194 195 volume_path = volume_dir / Path(filename).name.replace(".png", ".h5") 196 197 if not volume_path.exists(): 198 continue 199 200 volume_paths.append(volume_path) 201 202 return volume_paths 203 204 205def _get_paths_from_files(path, cell_type, split): 206 if split is not None: 207 raise RuntimeError( 208 "The SegPath metadata CSV is required for split selection, but it could not be found. " 209 "Please download the metadata with `download=True` or place it into the dataset folder." 210 ) 211 212 data_name = _get_data_name(cell_type) 213 214 return sorted((path / data_name / "data").glob("*.h5")) 215 216 217def get_segpath_paths( 218 path: Union[os.PathLike, str], 219 cell_types: Optional[Union[str, List[str]]] = None, 220 split: Optional[Literal["train", "val", "test"]] = None, 221 download: bool = False, 222) -> List[str]: 223 """Get paths to the SegPath data. 224 225 Args: 226 path: Filepath to a folder where the downloaded data will be saved. 227 cell_types: The cell types to use. By default all cell types are used. 228 split: The split to use. Either "train", "val", "test" or None for all images. 229 download: Whether to download the data if it is not present. 230 231 Returns: 232 List of filepaths to the preprocessed H5 files. 233 """ 234 path = Path(path) 235 if split is not None and split not in ("train", "val", "test"): 236 raise ValueError(f"'{split}' is not a valid split choice.") 237 238 cell_types = _to_cell_types(cell_types) 239 get_segpath_data(path, cell_types, download) 240 241 volume_paths = [] 242 for cell_type in cell_types: 243 metadata_path = path / URLS[cell_type]["metadata_name"] 244 if metadata_path.exists(): 245 this_volume_paths = _get_paths_from_metadata(path, cell_type, split) 246 else: 247 this_volume_paths = _get_paths_from_files(path, cell_type, split) 248 249 volume_paths.extend(this_volume_paths) 250 251 if not volume_paths: 252 raise RuntimeError("Could not find any SegPath images and masks for the requested settings.") 253 254 return sorted(str(p) for p in volume_paths) 255 256 257def get_segpath_dataset( 258 path: Union[os.PathLike, str], 259 patch_shape: Tuple[int, int], 260 cell_types: Optional[Union[str, List[str]]] = None, 261 split: Optional[Literal["train", "val", "test"]] = None, 262 download: bool = False, 263 label_dtype: torch.dtype = torch.int64, 264 resize_inputs: bool = False, 265 **kwargs 266) -> Dataset: 267 """Get the SegPath dataset for semantic segmentation in H&E stained histopathology images. 268 269 Args: 270 path: Filepath to a folder where the downloaded data will be saved. 271 patch_shape: The patch shape to use for training. 272 cell_types: The cell types to use. By default all cell types are used. 273 split: The split to use. Either "train", "val", "test" or None for all images. 274 download: Whether to download the data if it is not present. 275 label_dtype: The datatype of labels. 276 resize_inputs: Whether to resize the input images. 277 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 278 279 Returns: 280 The segmentation dataset. 281 """ 282 volume_paths = get_segpath_paths(path, cell_types, split, download) 283 284 if resize_inputs: 285 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 286 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 287 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 288 ) 289 290 return torch_em.default_segmentation_dataset( 291 raw_paths=volume_paths, 292 raw_key="images/raw", 293 label_paths=volume_paths, 294 label_key="labels/mask", 295 patch_shape=patch_shape, 296 label_dtype=label_dtype, 297 is_seg_dataset=True, 298 with_channels=True, 299 ndim=2, 300 **kwargs 301 ) 302 303 304def get_segpath_loader( 305 path: Union[os.PathLike, str], 306 patch_shape: Tuple[int, int], 307 batch_size: int, 308 cell_types: Optional[Union[str, List[str]]] = None, 309 split: Optional[Literal["train", "val", "test"]] = None, 310 download: bool = False, 311 label_dtype: torch.dtype = torch.int64, 312 resize_inputs: bool = False, 313 **kwargs 314) -> DataLoader: 315 """Get the SegPath dataloader. 316 317 Args: 318 path: Filepath to a folder where the downloaded data will be saved. 319 patch_shape: The patch shape to use for training. 320 batch_size: The batch size for training. 321 cell_types: The cell types to use. By default all cell types are used. 322 split: The split to use. Either "train", "val", "test" or None for all images. 323 download: Whether to download the data if it is not present. 324 label_dtype: The datatype of labels. 325 resize_inputs: Whether to resize the input images. 326 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 327 328 Returns: 329 The DataLoader. 330 """ 331 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 332 dataset = get_segpath_dataset( 333 path=path, patch_shape=patch_shape, cell_types=cell_types, split=split, download=download, 334 label_dtype=label_dtype, resize_inputs=resize_inputs, **ds_kwargs 335 ) 336 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URLS =
{'epithelium': {'data': 'https://zenodo.org/api/records/7412731/files/panCK_Epithelium.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412731/files/panCK_fileinfo.csv/content', 'data_name': 'panCK_Epithelium.tar.gz', 'metadata_name': 'panCK_fileinfo.csv'}, 'smooth_muscle': {'data': 'https://zenodo.org/api/records/7412732/files/aSMA_SmoothMuscle.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412732/files/aSMA_fileinfo.csv/content', 'data_name': 'aSMA_SmoothMuscle.tar.gz', 'metadata_name': 'aSMA_fileinfo.csv'}, 'red_blood_cells': {'data': 'https://zenodo.org/api/records/7412580/files/CD235a_RBC.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412580/files/CD235a_fileinfo.csv/content', 'data_name': 'CD235a_RBC.tar.gz', 'metadata_name': 'CD235a_fileinfo.csv'}, 'leukocytes': {'data': 'https://zenodo.org/api/records/7412739/files/CD45RB_Leukocyte.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412739/files/CD45RB_fileinfo.csv/content', 'data_name': 'CD45RB_Leukocyte.tar.gz', 'metadata_name': 'CD45RB_fileinfo.csv'}, 'lymphocytes': {'data': 'https://zenodo.org/api/records/7412529/files/CD3CD20_Lymphocyte.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412529/files/CD3CD20_fileinfo.csv/content', 'data_name': 'CD3CD20_Lymphocyte.tar.gz', 'metadata_name': 'CD3CD20_fileinfo.csv'}, 'endothelium': {'data': 'https://zenodo.org/api/records/7412512/files/ERG_Endothelium.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412512/files/ERG_fileinfo.csv/content', 'data_name': 'ERG_Endothelium.tar.gz', 'metadata_name': 'ERG_fileinfo.csv'}, 'plasma_cells': {'data': 'https://zenodo.org/api/records/7412500/files/MIST1_PlasmaCell.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412500/files/MIST1_fileinfo.csv/content', 'data_name': 'MIST1_PlasmaCell.tar.gz', 'metadata_name': 'MIST1_fileinfo.csv'}, 'myeloid_cells': {'data': 'https://zenodo.org/api/records/7412690/files/MNDA_MyeloidCell.tar.gz/content', 'metadata': 'https://zenodo.org/api/records/7412690/files/MNDA_fileinfo.csv/content', 'data_name': 'MNDA_MyeloidCell.tar.gz', 'metadata_name': 'MNDA_fileinfo.csv'}}
def
get_segpath_data( path: Union[os.PathLike, str], cell_types: Union[List[str], str, NoneType] = None, download: bool = False) -> None:
149def get_segpath_data( 150 path: Union[os.PathLike, str], 151 cell_types: Optional[Union[str, List[str]]] = None, 152 download: bool = False, 153) -> None: 154 """Download the SegPath data. 155 156 Args: 157 path: Filepath to a folder where the downloaded data will be saved. 158 cell_types: The cell types to download. By default all cell types are downloaded. 159 download: Whether to download the data if it is not present. 160 """ 161 path = Path(path) 162 path.mkdir(parents=True, exist_ok=True) 163 if not download: 164 return 165 166 for cell_type in _to_cell_types(cell_types): 167 source = URLS[cell_type] 168 tar_path = path / source["data_name"] 169 metadata_path = path / source["metadata_name"] 170 extracted_path = path / _get_data_name(cell_type) 171 172 util.download_source(metadata_path, source["metadata"], download, checksum=None) 173 174 if not (extracted_path / "data").exists(): 175 util.download_source(tar_path, source["data"], download, checksum=None) 176 _extract_data(tar_path, extracted_path)
Download the SegPath data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- cell_types: The cell types to download. By default all cell types are downloaded.
- download: Whether to download the data if it is not present.
def
get_segpath_paths( path: Union[os.PathLike, str], cell_types: Union[List[str], str, NoneType] = None, split: Optional[Literal['train', 'val', 'test']] = None, download: bool = False) -> List[str]:
218def get_segpath_paths( 219 path: Union[os.PathLike, str], 220 cell_types: Optional[Union[str, List[str]]] = None, 221 split: Optional[Literal["train", "val", "test"]] = None, 222 download: bool = False, 223) -> List[str]: 224 """Get paths to the SegPath data. 225 226 Args: 227 path: Filepath to a folder where the downloaded data will be saved. 228 cell_types: The cell types to use. By default all cell types are used. 229 split: The split to use. Either "train", "val", "test" or None for all images. 230 download: Whether to download the data if it is not present. 231 232 Returns: 233 List of filepaths to the preprocessed H5 files. 234 """ 235 path = Path(path) 236 if split is not None and split not in ("train", "val", "test"): 237 raise ValueError(f"'{split}' is not a valid split choice.") 238 239 cell_types = _to_cell_types(cell_types) 240 get_segpath_data(path, cell_types, download) 241 242 volume_paths = [] 243 for cell_type in cell_types: 244 metadata_path = path / URLS[cell_type]["metadata_name"] 245 if metadata_path.exists(): 246 this_volume_paths = _get_paths_from_metadata(path, cell_type, split) 247 else: 248 this_volume_paths = _get_paths_from_files(path, cell_type, split) 249 250 volume_paths.extend(this_volume_paths) 251 252 if not volume_paths: 253 raise RuntimeError("Could not find any SegPath images and masks for the requested settings.") 254 255 return sorted(str(p) for p in volume_paths)
Get paths to the SegPath data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- cell_types: The cell types to use. By default all cell types are used.
- split: The split to use. Either "train", "val", "test" or None for all images.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths to the preprocessed H5 files.
def
get_segpath_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], cell_types: Union[List[str], str, NoneType] = None, split: Optional[Literal['train', 'val', 'test']] = None, download: bool = False, label_dtype: torch.dtype = torch.int64, resize_inputs: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
258def get_segpath_dataset( 259 path: Union[os.PathLike, str], 260 patch_shape: Tuple[int, int], 261 cell_types: Optional[Union[str, List[str]]] = None, 262 split: Optional[Literal["train", "val", "test"]] = None, 263 download: bool = False, 264 label_dtype: torch.dtype = torch.int64, 265 resize_inputs: bool = False, 266 **kwargs 267) -> Dataset: 268 """Get the SegPath dataset for semantic segmentation in H&E stained histopathology images. 269 270 Args: 271 path: Filepath to a folder where the downloaded data will be saved. 272 patch_shape: The patch shape to use for training. 273 cell_types: The cell types to use. By default all cell types are used. 274 split: The split to use. Either "train", "val", "test" or None for all images. 275 download: Whether to download the data if it is not present. 276 label_dtype: The datatype of labels. 277 resize_inputs: Whether to resize the input images. 278 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 279 280 Returns: 281 The segmentation dataset. 282 """ 283 volume_paths = get_segpath_paths(path, cell_types, split, download) 284 285 if resize_inputs: 286 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 287 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 288 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 289 ) 290 291 return torch_em.default_segmentation_dataset( 292 raw_paths=volume_paths, 293 raw_key="images/raw", 294 label_paths=volume_paths, 295 label_key="labels/mask", 296 patch_shape=patch_shape, 297 label_dtype=label_dtype, 298 is_seg_dataset=True, 299 with_channels=True, 300 ndim=2, 301 **kwargs 302 )
Get the SegPath dataset for semantic segmentation in H&E stained histopathology images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- cell_types: The cell types to use. By default all cell types are used.
- split: The split to use. Either "train", "val", "test" or None for all images.
- download: Whether to download the data if it is not present.
- label_dtype: The datatype of labels.
- resize_inputs: Whether to resize the input images.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_segpath_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, cell_types: Union[List[str], str, NoneType] = None, split: Optional[Literal['train', 'val', 'test']] = None, download: bool = False, label_dtype: torch.dtype = torch.int64, resize_inputs: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
305def get_segpath_loader( 306 path: Union[os.PathLike, str], 307 patch_shape: Tuple[int, int], 308 batch_size: int, 309 cell_types: Optional[Union[str, List[str]]] = None, 310 split: Optional[Literal["train", "val", "test"]] = None, 311 download: bool = False, 312 label_dtype: torch.dtype = torch.int64, 313 resize_inputs: bool = False, 314 **kwargs 315) -> DataLoader: 316 """Get the SegPath dataloader. 317 318 Args: 319 path: Filepath to a folder where the downloaded data will be saved. 320 patch_shape: The patch shape to use for training. 321 batch_size: The batch size for training. 322 cell_types: The cell types to use. By default all cell types are used. 323 split: The split to use. Either "train", "val", "test" or None for all images. 324 download: Whether to download the data if it is not present. 325 label_dtype: The datatype of labels. 326 resize_inputs: Whether to resize the input images. 327 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 328 329 Returns: 330 The DataLoader. 331 """ 332 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 333 dataset = get_segpath_dataset( 334 path=path, patch_shape=patch_shape, cell_types=cell_types, split=split, download=download, 335 label_dtype=label_dtype, resize_inputs=resize_inputs, **ds_kwargs 336 ) 337 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the SegPath dataloader.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- cell_types: The cell types to use. By default all cell types are used.
- split: The split to use. Either "train", "val", "test" or None for all images.
- download: Whether to download the data if it is not present.
- label_dtype: The datatype of labels.
- resize_inputs: Whether to resize the input images.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.