torch_em.data.datasets.medical.dsad

The DSAD dataset contains annotations for abdominal organs in laparoscopy images.

This dataset is located at https://springernature.figshare.com/articles/dataset/The_Dresden_Surgical_Anatomy_Dataset_for_abdominal_organ_segmentation_in_surgical_data_science/21702600 # noqa The dataset is from the publication https://doi.org/10.1038/s41597-022-01719-2. Please cite it if you use this dataset for your research.

  1"""The DSAD dataset contains annotations for abdominal organs in laparoscopy images.
  2
  3This dataset is located at https://springernature.figshare.com/articles/dataset/The_Dresden_Surgical_Anatomy_Dataset_for_abdominal_organ_segmentation_in_surgical_data_science/21702600  # noqa
  4The dataset is from the publication https://doi.org/10.1038/s41597-022-01719-2.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from natsort import natsorted
 11from typing import Union, Tuple, List, Optional
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20URL = "https://springernature.figshare.com/ndownloader/files/38494425"
 21CHECKSUM = "b8a8ade37d106fc1641a901d1c843806f2d27f9f8e18f4614b043e7e2ca2e40f"
 22ORGANS = [
 23    "abdominal_wall", "inferior_mesenteric_artery", "liver", "pancreas", "spleen", "ureter",
 24    "colon", "intestinal_veins", "multilabel", "small_intestine", "stomach", "vesicular_glands"
 25]
 26
 27
 28def get_dsad_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the DSAD dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the data is downloaded for further processing.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        Filepath where the data is downloaded.
 37    """
 38    data_dir = os.path.join(path, "data")
 39    if os.path.exists(data_dir):
 40        return data_dir
 41
 42    os.makedirs(path, exist_ok=True)
 43
 44    zip_path = os.path.join(path, "data.zip")
 45    print("Downloading the DSAD data. Might take several minutes depending on your internet connection.")
 46    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 47    util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"), remove=False)
 48    print("The download has finished and the data has been unzipped to a target folder.")
 49
 50    return data_dir
 51
 52
 53def get_dsad_paths(
 54    path: Union[os.PathLike, str], organ: Optional[str] = None, download: bool = False
 55) -> Tuple[List[str], List[str]]:
 56    """Get paths to the DSAD data.
 57
 58    Args:
 59        path: Filepath to a folder where the data is downloaded for further processing.
 60        organ: The choice of organ annotations.
 61        download: Whether to download the data if it is not present.
 62
 63    Returns:
 64        List of filepaths for the image data.
 65        List of filepaths for the label data.
 66    """
 67    data_dir = get_dsad_data(path, download)
 68
 69    if organ is None:
 70        organ = "*"
 71    else:
 72        assert organ in ORGANS, f"'{organ}' is not a valid organ choice."
 73        assert isinstance(organ, str), "We currently support choosing one organ at a time."
 74
 75    image_paths = natsorted(glob(os.path.join(data_dir, organ, "*", "image*.png")))
 76    # Remove multi-label inputs.
 77    image_paths = [p for p in image_paths if "multilabel" not in p]
 78
 79    # Get label paths.
 80    mask_paths = [p.replace("image", "mask") for p in image_paths]
 81    assert all([os.path.exists(p) for p in mask_paths])
 82
 83    assert image_paths and len(image_paths) == len(mask_paths)
 84
 85    return image_paths, mask_paths
 86
 87
 88def get_dsad_dataset(
 89    path: Union[os.PathLike, str],
 90    patch_shape: Tuple[int, int],
 91    organ: Optional[str] = None,
 92    resize_inputs: bool = False,
 93    download: bool = False,
 94    **kwargs
 95) -> Dataset:
 96    """Get the DSAD dataset for organ segmentation.
 97
 98    Args:
 99        path: Filepath to a folder where the data is downloaded for further processing.
100        patch_shape: The patch shape to use for training.
101        organ: The choice of organ annotations.
102        resize_inputs: Whether to resize the inputs to the expected patch shape.
103        download: Whether to download the data if it is not present.
104        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
105
106    Returns:
107        The segmentation dataset.
108    """
109    image_paths, mask_paths = get_dsad_paths(path, organ, download)
110
111    if resize_inputs:
112        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
113        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
114            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
115        )
116
117    return torch_em.default_segmentation_dataset(
118        raw_paths=image_paths,
119        raw_key=None,
120        label_paths=mask_paths,
121        label_key=None,
122        patch_shape=patch_shape,
123        with_channels=True,
124        is_seg_dataset=False,
125        **kwargs
126    )
127
128
129def get_dsad_loader(
130    path: Union[os.PathLike, str],
131    batch_size: int,
132    patch_shape: Tuple[int, int],
133    organ: Optional[str] = None,
134    resize_inputs: bool = False,
135    download: bool = False,
136    **kwargs
137) -> DataLoader:
138    """Get the DSAD dataloader for organ segmentation.
139
140    Args:
141        path: Filepath to a folder where the data is downloaded for further processing.
142        batch_size: The batch size for training.
143        patch_shape: The patch shape to use for training.
144        organ: The choice of organ annotations.
145        resize_inputs: Whether to resize the inputs to the expected patch shape.
146        download: Whether to download the data if it is not present.
147        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
148
149    Returns:
150        The Dataloader.
151    """
152    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
153    dataset = get_dsad_dataset(path, patch_shape, organ, resize_inputs, download, **ds_kwargs)
154    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://springernature.figshare.com/ndownloader/files/38494425'
CHECKSUM = 'b8a8ade37d106fc1641a901d1c843806f2d27f9f8e18f4614b043e7e2ca2e40f'
ORGANS = ['abdominal_wall', 'inferior_mesenteric_artery', 'liver', 'pancreas', 'spleen', 'ureter', 'colon', 'intestinal_veins', 'multilabel', 'small_intestine', 'stomach', 'vesicular_glands']
def get_dsad_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_dsad_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the DSAD dataset.
31
32    Args:
33        path: Filepath to a folder where the data is downloaded for further processing.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        Filepath where the data is downloaded.
38    """
39    data_dir = os.path.join(path, "data")
40    if os.path.exists(data_dir):
41        return data_dir
42
43    os.makedirs(path, exist_ok=True)
44
45    zip_path = os.path.join(path, "data.zip")
46    print("Downloading the DSAD data. Might take several minutes depending on your internet connection.")
47    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
48    util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"), remove=False)
49    print("The download has finished and the data has been unzipped to a target folder.")
50
51    return data_dir

Download the DSAD dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_dsad_paths( path: Union[os.PathLike, str], organ: Optional[str] = None, download: bool = False) -> Tuple[List[str], List[str]]:
54def get_dsad_paths(
55    path: Union[os.PathLike, str], organ: Optional[str] = None, download: bool = False
56) -> Tuple[List[str], List[str]]:
57    """Get paths to the DSAD data.
58
59    Args:
60        path: Filepath to a folder where the data is downloaded for further processing.
61        organ: The choice of organ annotations.
62        download: Whether to download the data if it is not present.
63
64    Returns:
65        List of filepaths for the image data.
66        List of filepaths for the label data.
67    """
68    data_dir = get_dsad_data(path, download)
69
70    if organ is None:
71        organ = "*"
72    else:
73        assert organ in ORGANS, f"'{organ}' is not a valid organ choice."
74        assert isinstance(organ, str), "We currently support choosing one organ at a time."
75
76    image_paths = natsorted(glob(os.path.join(data_dir, organ, "*", "image*.png")))
77    # Remove multi-label inputs.
78    image_paths = [p for p in image_paths if "multilabel" not in p]
79
80    # Get label paths.
81    mask_paths = [p.replace("image", "mask") for p in image_paths]
82    assert all([os.path.exists(p) for p in mask_paths])
83
84    assert image_paths and len(image_paths) == len(mask_paths)
85
86    return image_paths, mask_paths

Get paths to the DSAD data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • organ: The choice of organ annotations.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_dsad_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], organ: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 89def get_dsad_dataset(
 90    path: Union[os.PathLike, str],
 91    patch_shape: Tuple[int, int],
 92    organ: Optional[str] = None,
 93    resize_inputs: bool = False,
 94    download: bool = False,
 95    **kwargs
 96) -> Dataset:
 97    """Get the DSAD dataset for organ segmentation.
 98
 99    Args:
100        path: Filepath to a folder where the data is downloaded for further processing.
101        patch_shape: The patch shape to use for training.
102        organ: The choice of organ annotations.
103        resize_inputs: Whether to resize the inputs to the expected patch shape.
104        download: Whether to download the data if it is not present.
105        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
106
107    Returns:
108        The segmentation dataset.
109    """
110    image_paths, mask_paths = get_dsad_paths(path, organ, download)
111
112    if resize_inputs:
113        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
114        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
115            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
116        )
117
118    return torch_em.default_segmentation_dataset(
119        raw_paths=image_paths,
120        raw_key=None,
121        label_paths=mask_paths,
122        label_key=None,
123        patch_shape=patch_shape,
124        with_channels=True,
125        is_seg_dataset=False,
126        **kwargs
127    )

Get the DSAD dataset for organ segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • organ: The choice of organ annotations.
  • resize_inputs: Whether to resize the inputs to the expected patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_dsad_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], organ: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
130def get_dsad_loader(
131    path: Union[os.PathLike, str],
132    batch_size: int,
133    patch_shape: Tuple[int, int],
134    organ: Optional[str] = None,
135    resize_inputs: bool = False,
136    download: bool = False,
137    **kwargs
138) -> DataLoader:
139    """Get the DSAD dataloader for organ segmentation.
140
141    Args:
142        path: Filepath to a folder where the data is downloaded for further processing.
143        batch_size: The batch size for training.
144        patch_shape: The patch shape to use for training.
145        organ: The choice of organ annotations.
146        resize_inputs: Whether to resize the inputs to the expected patch shape.
147        download: Whether to download the data if it is not present.
148        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
149
150    Returns:
151        The Dataloader.
152    """
153    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
154    dataset = get_dsad_dataset(path, patch_shape, organ, resize_inputs, download, **ds_kwargs)
155    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the DSAD dataloader for organ segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • organ: The choice of organ annotations.
  • resize_inputs: Whether to resize the inputs to the expected patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The Dataloader.