torch_em.data.datasets.histopathology.monusac

This dataset consists annotations for nucleus segmentation in H&E stained tissue images derived from four different organs.

This dataset comes from https://monusac-2020.grand-challenge.org/Data/.

This dataset is from the publication https://doi.org/10.1109/TMI.2021.3085712. Please cite it if you use this dataset in your research.

  1"""This dataset consists annotations for nucleus segmentation in
  2H&E stained tissue images derived from four different organs.
  3
  4This dataset comes from https://monusac-2020.grand-challenge.org/Data/.
  5
  6This dataset is from the publication https://doi.org/10.1109/TMI.2021.3085712.
  7Please cite it if you use this dataset in your research.
  8"""
  9
 10import os
 11import shutil
 12from glob import glob
 13from tqdm import tqdm
 14from pathlib import Path
 15from typing import Optional, List, Union, Literal, Tuple
 16
 17import imageio.v3 as imageio
 18
 19from torch.utils.data import Dataset, DataLoader
 20
 21import torch_em
 22
 23from .. import util
 24
 25
 26URL = {
 27    "train": "https://drive.google.com/uc?export=download&id=1lxMZaAPSpEHLSxGA9KKMt_r-4S8dwLhq",
 28    "test": "https://drive.google.com/uc?export=download&id=1G54vsOdxWY1hG7dzmkeK3r0xz9s-heyQ"
 29}
 30
 31
 32CHECKSUM = {
 33    "train": "5b7cbeb34817a8f880d3fddc28391e48d3329a91bf3adcbd131ea149a725cd92",
 34    "test": "bcbc38f6bf8b149230c90c29f3428cc7b2b76f8acd7766ce9fc908fc896c2674"
 35}
 36
 37# here's the description: https://drive.google.com/file/d/1kdOl3s6uQBRv0nToSIf1dPuceZunzL4N/view
 38ORGAN_SPLITS = {
 39    "train": {
 40        "lung": [
 41            "TCGA-55-1594", "TCGA-69-7760", "TCGA-69-A59K", "TCGA-73-4668", "TCGA-78-7220",
 42            "TCGA-86-7713", "TCGA-86-8672", "TCGA-L4-A4E5", "TCGA-MP-A4SY", "TCGA-MP-A4T7"
 43        ],
 44        "kidney": [
 45            "TCGA-5P-A9K0", "TCGA-B9-A44B", "TCGA-B9-A8YI", "TCGA-DW-7841", "TCGA-EV-5903", "TCGA-F9-A97G",
 46            "TCGA-G7-A8LD", "TCGA-MH-A560", "TCGA-P4-AAVK", "TCGA-SX-A7SR", "TCGA-UZ-A9PO", "TCGA-UZ-A9PU"
 47        ],
 48        "breast": [
 49            "TCGA-A2-A0CV", "TCGA-A2-A0ES", "TCGA-B6-A0WZ", "TCGA-BH-A18T", "TCGA-D8-A1X5",
 50            "TCGA-E2-A154", "TCGA-E9-A22B", "TCGA-E9-A22G", "TCGA-EW-A6SD", "TCGA-S3-AA11"
 51        ],
 52        "prostate": [
 53            "TCGA-EJ-5495", "TCGA-EJ-5505", "TCGA-EJ-5517", "TCGA-G9-6342", "TCGA-G9-6499",
 54            "TCGA-J4-A67Q", "TCGA-J4-A67T", "TCGA-KK-A59X", "TCGA-KK-A6E0", "TCGA-KK-A7AW",
 55            "TCGA-V1-A8WL", "TCGA-V1-A9O9", "TCGA-X4-A8KQ", "TCGA-YL-A9WY"
 56        ]
 57    },
 58    "test": {
 59        "lung": [
 60            "TCGA-49-6743", "TCGA-50-6591", "TCGA-55-7570", "TCGA-55-7573",
 61            "TCGA-73-4662", "TCGA-78-7152", "TCGA-MP-A4T7"
 62        ],
 63        "kidney": [
 64            "TCGA-2Z-A9JG", "TCGA-2Z-A9JN", "TCGA-DW-7838", "TCGA-DW-7963",
 65            "TCGA-F9-A8NY", "TCGA-IZ-A6M9", "TCGA-MH-A55W"
 66        ],
 67        "breast": ["TCGA-A2-A04X", "TCGA-A2-A0ES", "TCGA-D8-A3Z6", "TCGA-E2-A108", "TCGA-EW-A6SB"],
 68        "prostate": ["TCGA-G9-6356", "TCGA-G9-6367", "TCGA-VP-A87E", "TCGA-VP-A87H", "TCGA-X4-A8KS", "TCGA-YL-A9WL"]
 69    },
 70}
 71
 72
 73def _check_channel_consistency(path, split):
 74    "The provided tif images have RGBA channels, check and remove the alpha channel"
 75    all_image_path = glob(os.path.join(path, "images", split, "*.tif"))
 76    for image_path in all_image_path:
 77        image = imageio.imread(image_path)
 78        if image.ndim == 3 and image.shape[-1] == 4:  # NOTE: There are images without an alpha channel.
 79            rgb_image = image[..., :-1]  # get rid of the alpha channel
 80            imageio.imwrite(image_path, rgb_image)
 81
 82
 83def _process_monusac(path, split):
 84    util.unzip(os.path.join(path, f"monusac_{split}.zip"), path)
 85
 86    # assorting the images into expected dir;
 87    # converting the label xml files to numpy arrays (of same dimension as input images) in the expected dir
 88    root_img_save_dir = os.path.join(path, "images", split)
 89    root_label_save_dir = os.path.join(path, "labels", split)
 90
 91    os.makedirs(root_img_save_dir, exist_ok=True)
 92    os.makedirs(root_label_save_dir, exist_ok=True)
 93
 94    all_patient_dir = sorted(glob(os.path.join(path, "MoNuSAC*", "*")))
 95
 96    for patient_dir in tqdm(all_patient_dir, desc=f"Converting {split} inputs for all patients"):
 97        all_img_dir = sorted(glob(os.path.join(patient_dir, "*.tif")))
 98        all_xml_label_dir = sorted(glob(os.path.join(patient_dir, "*.xml")))
 99
100        if len(all_img_dir) != len(all_xml_label_dir):
101            _convert_missing_tif_from_svs(patient_dir)
102            all_img_dir = sorted(glob(os.path.join(patient_dir, "*.tif")))
103
104        assert len(all_img_dir) == len(all_xml_label_dir)
105
106        for img_path, xml_label_path in zip(all_img_dir, all_xml_label_dir):
107            desired_label_shape = imageio.imread(img_path).shape[:-1]
108
109            img_id = os.path.split(img_path)[-1]
110            dst = os.path.join(root_img_save_dir, img_id)
111            shutil.move(src=img_path, dst=dst)
112
113            _label = util.generate_labeled_array_from_xml(shape=desired_label_shape, xml_file=xml_label_path)
114            _fileid = img_id.split(".")[0]
115            imageio.imwrite(os.path.join(root_label_save_dir, f"{_fileid}.tif"), _label)
116
117    shutil.rmtree(glob(os.path.join(path, "MoNuSAC*"))[0])
118
119
120def _convert_missing_tif_from_svs(patient_dir):
121    """This function activates when we see some missing tiff inputs (and converts svs to tiff)
122
123    Cause: Happens only in the test split, maybe while converting the data, some were missed
124    Fix: We have the original svs scans. We convert the svs scans to tiff
125    """
126    all_svs_dir = sorted(glob(os.path.join(patient_dir, "*.svs")))
127    for svs_path in all_svs_dir:
128        save_tif_path = os.path.splitext(svs_path)[0] + ".tif"
129        if not os.path.exists(save_tif_path):
130            img_array = util.convert_svs_to_array(svs_path)
131            # the array from svs scans are supposed to be RGB images
132            assert img_array.shape[-1] == 3
133            imageio.imwrite(save_tif_path, img_array)
134
135
136def get_patient_id(path, split_wrt="-01Z-00-"):
137    """Gets us the patient id in the expected format
138    Input Names: "TCGA-<XX>-<XXXX>-01z-00-DX<X>-(<X>, <00X>).tif" (example: TCGA-2Z-A9JG-01Z-00-DX1_1.tif)
139    Expected: "TCGA-<XX>-<XXXX>"                                  (example: TCGA-2Z-A9JG)
140    """
141    patient_image_id = Path(path).stem
142    patient_id = patient_image_id.split(split_wrt)[0]
143    return patient_id
144
145
146def get_monusac_data(path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False):
147    """Download the MoNuSAC dataset.
148
149    Args:
150        path: Filepath to a folder where the downloaded data will be saved.
151        split: The split to use for the dataset. Either 'train' or 'test'.
152        download: Whether to download the data if it is not present.
153    """
154    assert split in ["train", "test"], "Please choose from train/test"
155
156    # check if we have extracted the images and labels already
157    im_path = os.path.join(path, "images", split)
158    label_path = os.path.join(path, "labels", split)
159    if os.path.exists(im_path) and os.path.exists(label_path):
160        return
161
162    os.makedirs(path, exist_ok=True)
163    zip_path = os.path.join(path, f"monusac_{split}.zip")
164    util.download_source_gdrive(zip_path, URL[split], download=download, checksum=CHECKSUM[split])
165
166    _process_monusac(path, split)
167
168    _check_channel_consistency(path, split)
169
170
171def get_monusac_paths(
172    path: Union[os.PathLike, str],
173    split: Literal['train', 'val'],
174    organ_type: Optional[List[str]] = None,
175    download: bool = False
176) -> Tuple[List[str], List[str]]:
177    """Get paths to MoNuSAC data.
178
179    Args:
180        path: Filepath to a folder where the downloaded data will be saved.
181        split: The split to use for the dataset. Either 'train' or 'test'.
182        organ_type: The choice of organ type.
183        download: Whether to download the data if it is not present.
184
185    Returns:
186        List of filepaths to the image data.
187        List of filepaths to the label data.
188    """
189    get_monusac_data(path, split, download)
190
191    image_paths = sorted(glob(os.path.join(path, "images", split, "*")))
192    label_paths = sorted(glob(os.path.join(path, "labels", split, "*")))
193
194    if organ_type is not None:
195        # get all patients for multiple organ selection
196        all_organ_splits = sum([ORGAN_SPLITS[split][o] for o in organ_type], [])
197
198        image_paths = [_path for _path in image_paths if get_patient_id(_path) in all_organ_splits]
199        label_paths = [_path for _path in label_paths if get_patient_id(_path) in all_organ_splits]
200
201    assert len(image_paths) == len(label_paths)
202
203    return image_paths, label_paths
204
205
206def get_monusac_dataset(
207    path: Union[os.PathLike, str],
208    patch_shape: Tuple[int, ...],
209    split: Literal['train', 'test'],
210    organ_type: Optional[List[str]] = None,
211    download: bool = False,
212    offsets: Optional[List[List[int]]] = None,
213    boundaries: bool = False,
214    binary: bool = False,
215    resize_inputs: bool = False,
216    **kwargs
217) -> Dataset:
218    """Get the MoNuSAC dataset for nucleus segmentation in H&E stained tissue images.
219
220    Args:
221        path: Filepath to a folder where the downloaded data will be saved.
222        patch_shape: The patch shape to use for training.
223        split: The split to use for the dataset. Either 'train' or 'test'.
224        organ_type: The choice of organ type.
225        download: Whether to download the data if it is not present.
226        offsets: Offset values for affinity computation used as target.
227        boundaries: Whether to compute boundaries as the target.
228        binary: Whether to use a binary segmentation target.
229        resize_inputs: Whether to resize the inputs.
230        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
231
232    Returns:
233        The segmentation dataset.
234    """
235    image_paths, label_paths = get_monusac_paths(path, split, organ_type, download)
236
237    kwargs, _ = util.add_instance_label_transform(
238        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
239    )
240
241    if resize_inputs:
242        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
243        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
244            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
245        )
246
247    return torch_em.default_segmentation_dataset(
248        raw_paths=image_paths,
249        raw_key=None,
250        label_paths=label_paths,
251        label_key=None,
252        patch_shape=patch_shape,
253        is_seg_dataset=False,
254        **kwargs
255    )
256
257
258def get_monusac_loader(
259    path: Union[os.PathLike, str],
260    patch_shape: Tuple[int, ...],
261    batch_size: int,
262    split: Literal['train', 'test'],
263    organ_type: Optional[List[str]] = None,
264    download: bool = False,
265    offsets: Optional[List[List[int]]] = None,
266    boundaries: bool = False,
267    binary: bool = False,
268    resize_inputs: bool = False,
269    **kwargs
270) -> DataLoader:
271    """Get the MoNuSAC dataloader for nucleus segmentation in H&E stained tissue images.
272
273    Args:
274        path: Filepath to a folder where the downloaded data will be saved.
275        patch_shape: The patch shape to use for training.
276        batch_size: The batch size for training.
277        split: The split to use for the dataset. Either 'train' or 'test'.
278        organ_type: The choice of organ type.
279        download: Whether to download the data if it is not present.
280        offsets: Offset values for affinity computation used as target.
281        boundaries: Whether to compute boundaries as the target.
282        binary: Whether to use a binary segmentation target.
283        resize_inputs: Whether to resize the inputs.
284        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
285
286    Returns:
287        The DataLoader
288    """
289    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
290    dataset = get_monusac_dataset(
291        path, patch_shape, split, organ_type, download, offsets, boundaries, binary, resize_inputs, **ds_kwargs
292    )
293    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = {'train': 'https://drive.google.com/uc?export=download&id=1lxMZaAPSpEHLSxGA9KKMt_r-4S8dwLhq', 'test': 'https://drive.google.com/uc?export=download&id=1G54vsOdxWY1hG7dzmkeK3r0xz9s-heyQ'}
CHECKSUM = {'train': '5b7cbeb34817a8f880d3fddc28391e48d3329a91bf3adcbd131ea149a725cd92', 'test': 'bcbc38f6bf8b149230c90c29f3428cc7b2b76f8acd7766ce9fc908fc896c2674'}
ORGAN_SPLITS = {'train': {'lung': ['TCGA-55-1594', 'TCGA-69-7760', 'TCGA-69-A59K', 'TCGA-73-4668', 'TCGA-78-7220', 'TCGA-86-7713', 'TCGA-86-8672', 'TCGA-L4-A4E5', 'TCGA-MP-A4SY', 'TCGA-MP-A4T7'], 'kidney': ['TCGA-5P-A9K0', 'TCGA-B9-A44B', 'TCGA-B9-A8YI', 'TCGA-DW-7841', 'TCGA-EV-5903', 'TCGA-F9-A97G', 'TCGA-G7-A8LD', 'TCGA-MH-A560', 'TCGA-P4-AAVK', 'TCGA-SX-A7SR', 'TCGA-UZ-A9PO', 'TCGA-UZ-A9PU'], 'breast': ['TCGA-A2-A0CV', 'TCGA-A2-A0ES', 'TCGA-B6-A0WZ', 'TCGA-BH-A18T', 'TCGA-D8-A1X5', 'TCGA-E2-A154', 'TCGA-E9-A22B', 'TCGA-E9-A22G', 'TCGA-EW-A6SD', 'TCGA-S3-AA11'], 'prostate': ['TCGA-EJ-5495', 'TCGA-EJ-5505', 'TCGA-EJ-5517', 'TCGA-G9-6342', 'TCGA-G9-6499', 'TCGA-J4-A67Q', 'TCGA-J4-A67T', 'TCGA-KK-A59X', 'TCGA-KK-A6E0', 'TCGA-KK-A7AW', 'TCGA-V1-A8WL', 'TCGA-V1-A9O9', 'TCGA-X4-A8KQ', 'TCGA-YL-A9WY']}, 'test': {'lung': ['TCGA-49-6743', 'TCGA-50-6591', 'TCGA-55-7570', 'TCGA-55-7573', 'TCGA-73-4662', 'TCGA-78-7152', 'TCGA-MP-A4T7'], 'kidney': ['TCGA-2Z-A9JG', 'TCGA-2Z-A9JN', 'TCGA-DW-7838', 'TCGA-DW-7963', 'TCGA-F9-A8NY', 'TCGA-IZ-A6M9', 'TCGA-MH-A55W'], 'breast': ['TCGA-A2-A04X', 'TCGA-A2-A0ES', 'TCGA-D8-A3Z6', 'TCGA-E2-A108', 'TCGA-EW-A6SB'], 'prostate': ['TCGA-G9-6356', 'TCGA-G9-6367', 'TCGA-VP-A87E', 'TCGA-VP-A87H', 'TCGA-X4-A8KS', 'TCGA-YL-A9WL']}}
def get_patient_id(path, split_wrt='-01Z-00-'):
137def get_patient_id(path, split_wrt="-01Z-00-"):
138    """Gets us the patient id in the expected format
139    Input Names: "TCGA-<XX>-<XXXX>-01z-00-DX<X>-(<X>, <00X>).tif" (example: TCGA-2Z-A9JG-01Z-00-DX1_1.tif)
140    Expected: "TCGA-<XX>-<XXXX>"                                  (example: TCGA-2Z-A9JG)
141    """
142    patient_image_id = Path(path).stem
143    patient_id = patient_image_id.split(split_wrt)[0]
144    return patient_id

Gets us the patient id in the expected format Input Names: "TCGA---01z-00-DX-(, <00X>).tif" (example: TCGA-2Z-A9JG-01Z-00-DX1_1.tif) Expected: "TCGA--" (example: TCGA-2Z-A9JG)

def get_monusac_data( path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False):
147def get_monusac_data(path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False):
148    """Download the MoNuSAC dataset.
149
150    Args:
151        path: Filepath to a folder where the downloaded data will be saved.
152        split: The split to use for the dataset. Either 'train' or 'test'.
153        download: Whether to download the data if it is not present.
154    """
155    assert split in ["train", "test"], "Please choose from train/test"
156
157    # check if we have extracted the images and labels already
158    im_path = os.path.join(path, "images", split)
159    label_path = os.path.join(path, "labels", split)
160    if os.path.exists(im_path) and os.path.exists(label_path):
161        return
162
163    os.makedirs(path, exist_ok=True)
164    zip_path = os.path.join(path, f"monusac_{split}.zip")
165    util.download_source_gdrive(zip_path, URL[split], download=download, checksum=CHECKSUM[split])
166
167    _process_monusac(path, split)
168
169    _check_channel_consistency(path, split)

Download the MoNuSAC dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use for the dataset. Either 'train' or 'test'.
  • download: Whether to download the data if it is not present.
def get_monusac_paths( path: Union[os.PathLike, str], split: Literal['train', 'val'], organ_type: Optional[List[str]] = None, download: bool = False) -> Tuple[List[str], List[str]]:
172def get_monusac_paths(
173    path: Union[os.PathLike, str],
174    split: Literal['train', 'val'],
175    organ_type: Optional[List[str]] = None,
176    download: bool = False
177) -> Tuple[List[str], List[str]]:
178    """Get paths to MoNuSAC data.
179
180    Args:
181        path: Filepath to a folder where the downloaded data will be saved.
182        split: The split to use for the dataset. Either 'train' or 'test'.
183        organ_type: The choice of organ type.
184        download: Whether to download the data if it is not present.
185
186    Returns:
187        List of filepaths to the image data.
188        List of filepaths to the label data.
189    """
190    get_monusac_data(path, split, download)
191
192    image_paths = sorted(glob(os.path.join(path, "images", split, "*")))
193    label_paths = sorted(glob(os.path.join(path, "labels", split, "*")))
194
195    if organ_type is not None:
196        # get all patients for multiple organ selection
197        all_organ_splits = sum([ORGAN_SPLITS[split][o] for o in organ_type], [])
198
199        image_paths = [_path for _path in image_paths if get_patient_id(_path) in all_organ_splits]
200        label_paths = [_path for _path in label_paths if get_patient_id(_path) in all_organ_splits]
201
202    assert len(image_paths) == len(label_paths)
203
204    return image_paths, label_paths

Get paths to MoNuSAC data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use for the dataset. Either 'train' or 'test'.
  • organ_type: The choice of organ type.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths to the image data. List of filepaths to the label data.

def get_monusac_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'test'], organ_type: Optional[List[str]] = None, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, resize_inputs: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
207def get_monusac_dataset(
208    path: Union[os.PathLike, str],
209    patch_shape: Tuple[int, ...],
210    split: Literal['train', 'test'],
211    organ_type: Optional[List[str]] = None,
212    download: bool = False,
213    offsets: Optional[List[List[int]]] = None,
214    boundaries: bool = False,
215    binary: bool = False,
216    resize_inputs: bool = False,
217    **kwargs
218) -> Dataset:
219    """Get the MoNuSAC dataset for nucleus segmentation in H&E stained tissue images.
220
221    Args:
222        path: Filepath to a folder where the downloaded data will be saved.
223        patch_shape: The patch shape to use for training.
224        split: The split to use for the dataset. Either 'train' or 'test'.
225        organ_type: The choice of organ type.
226        download: Whether to download the data if it is not present.
227        offsets: Offset values for affinity computation used as target.
228        boundaries: Whether to compute boundaries as the target.
229        binary: Whether to use a binary segmentation target.
230        resize_inputs: Whether to resize the inputs.
231        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
232
233    Returns:
234        The segmentation dataset.
235    """
236    image_paths, label_paths = get_monusac_paths(path, split, organ_type, download)
237
238    kwargs, _ = util.add_instance_label_transform(
239        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
240    )
241
242    if resize_inputs:
243        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
244        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
245            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
246        )
247
248    return torch_em.default_segmentation_dataset(
249        raw_paths=image_paths,
250        raw_key=None,
251        label_paths=label_paths,
252        label_key=None,
253        patch_shape=patch_shape,
254        is_seg_dataset=False,
255        **kwargs
256    )

Get the MoNuSAC dataset for nucleus segmentation in H&E stained tissue images.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The split to use for the dataset. Either 'train' or 'test'.
  • organ_type: The choice of organ type.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • resize_inputs: Whether to resize the inputs.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_monusac_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], batch_size: int, split: Literal['train', 'test'], organ_type: Optional[List[str]] = None, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, resize_inputs: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
259def get_monusac_loader(
260    path: Union[os.PathLike, str],
261    patch_shape: Tuple[int, ...],
262    batch_size: int,
263    split: Literal['train', 'test'],
264    organ_type: Optional[List[str]] = None,
265    download: bool = False,
266    offsets: Optional[List[List[int]]] = None,
267    boundaries: bool = False,
268    binary: bool = False,
269    resize_inputs: bool = False,
270    **kwargs
271) -> DataLoader:
272    """Get the MoNuSAC dataloader for nucleus segmentation in H&E stained tissue images.
273
274    Args:
275        path: Filepath to a folder where the downloaded data will be saved.
276        patch_shape: The patch shape to use for training.
277        batch_size: The batch size for training.
278        split: The split to use for the dataset. Either 'train' or 'test'.
279        organ_type: The choice of organ type.
280        download: Whether to download the data if it is not present.
281        offsets: Offset values for affinity computation used as target.
282        boundaries: Whether to compute boundaries as the target.
283        binary: Whether to use a binary segmentation target.
284        resize_inputs: Whether to resize the inputs.
285        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
286
287    Returns:
288        The DataLoader
289    """
290    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
291    dataset = get_monusac_dataset(
292        path, patch_shape, split, organ_type, download, offsets, boundaries, binary, resize_inputs, **ds_kwargs
293    )
294    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the MoNuSAC dataloader for nucleus segmentation in H&E stained tissue images.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • split: The split to use for the dataset. Either 'train' or 'test'.
  • organ_type: The choice of organ type.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • resize_inputs: Whether to resize the inputs.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader