torch_em.data.datasets.medical.panorama

The PANORAMA dataset contains annotation for PDAC lesion, veins, arteries, pancreas parenchyma, pancreatic duct and common bile duct segmentation in CT scans.

The dataset is from the PANORAMA challenge: https://panorama.grand-challenge.org/.

NOTE: The latest information for the label legends are located at: https://github.com/DIAGNijmegen/panorama_labels#label-legend. The label legends are described as follows:

background: 0
PDAC lesion: 1
veins: 2
arteries: 3
pancreas parenchyma: 4
pancreatic duct: 5
common bile duct: 6

This dataset is from the article: https://doi.org/10.5281/zenodo.10599559 Please cite it if you use this dataset in your research.

View Source

  1"""The PANORAMA dataset contains annotation for PDAC lesion, veins, arteries, pancreas parenchyma,
  2pancreatic duct and common bile duct segmentation in CT scans.
  3
  4The dataset is from the PANORAMA challenge: https://panorama.grand-challenge.org/.
  5
  6NOTE: The latest information for the label legends are located at:
  7https://github.com/DIAGNijmegen/panorama_labels#label-legend.
  8The label legends are described as follows:
  9- background: 0
 10- PDAC lesion: 1
 11- veins: 2
 12- arteries: 3
 13- pancreas parenchyma: 4
 14- pancreatic duct: 5
 15- common bile duct: 6
 16
 17This dataset is from the article: https://doi.org/10.5281/zenodo.10599559
 18Please cite it if you use this dataset in your research.
 19"""
 20
 21import os
 22import shutil
 23import subprocess
 24from glob import glob
 25from natsort import natsorted
 26from typing import Union, Tuple, Optional, Literal, List
 27
 28from torch.utils.data import Dataset, DataLoader
 29
 30import torch_em
 31
 32from .. import util
 33
 34
 35URLS = {
 36    "batch_1": "https://zenodo.org/records/13715870/files/batch_1.zip",
 37    "batch_2": "https://zenodo.org/records/13742336/files/batch_2.zip",
 38    "batch_3": "https://zenodo.org/records/11034011/files/batch_3.zip",
 39    "batch_4": "https://zenodo.org/records/10999754/files/batch_4.zip",
 40}
 41
 42CHECKSUMS = {
 43    "batch_1": "aff39b6347650d6c7457adf7a04bfb0a651ab6ecd33676ff109bdab17bc41cff",
 44    "batch_2": "db6353a2c1c565c8bf084bd4fe1512fd6020b7675a1c9ab61b9a13d72a9fe76c",
 45    "batch_3": "c1d71b40948edc36f795a7801cc79000082df8d365c48574af50b36516d64cee",
 46    "batch_4": "3b5341af79c2cc8b8a9fa3ab7a6cfa8fedf694538a3d6be97c18e5c82be4d9d8",
 47}
 48
 49
 50def get_panorama_data(path: Union[os.PathLike, str], download: bool = False):
 51    """Download the PANORAMA data.
 52
 53    Args:
 54        path: Filepath to a folder where the data is downloaded for further processing.
 55        download: Whether to download the data if it is not present.
 56    """
 57    data_path = os.path.join(path, "volumes")
 58    label_path = os.path.join(path, "labels")
 59    if os.path.exists(data_path) and os.path.exists(label_path):
 60        return
 61
 62    os.makedirs(path, exist_ok=True)
 63
 64    print("PANORAMA is a large dataset. I might take a while to download the volumes and respective labels.")
 65
 66    # Download the label volumes.
 67    subprocess.call(
 68        ["git", "clone", "--quiet", "https://github.com/DIAGNijmegen/panorama_labels", label_path]
 69    )
 70
 71    def _move_batch_data_to_root(batch):
 72        if batch in ["batch_3", "batch_4"]:
 73            batch_dir = os.path.join(data_path, batch)
 74
 75            for fpath in glob(os.path.join(batch_dir, "*.nii.gz")):
 76                shutil.move(src=fpath, dst=data_path)
 77
 78            if os.path.exists(batch_dir):
 79                shutil.rmtree(batch_dir)
 80
 81    # Download the input volumes.
 82    for batch in URLS.keys():
 83        zip_path = os.path.join(path, f"{batch}.zip")
 84        util.download_source(path=zip_path, url=URLS[batch], download=download, checksum=CHECKSUMS[batch])
 85        util.unzip(zip_path=zip_path, dst=data_path)
 86        _move_batch_data_to_root(batch)
 87
 88
 89def get_panorama_paths(
 90    path: Union[os.PathLike, str],
 91    annotation_choice: Optional[Literal["manual", "automatic"]] = None,
 92    download: bool = False
 93) -> Tuple[List[str], List[str]]:
 94    """Get paths to the PANORAMA data.
 95
 96    Args:
 97        path: Filepath to a folder where the downloaded data will be saved.
 98        annotation_choice: The source of annotation.
 99        download: Whether to download the data if it is not present.
100
101    Returns:
102        List of filepaths for the image data.
103        List of filepaths for the label data.
104    """
105    get_panorama_data(path, download)
106
107    if annotation_choice is None:
108        annotation_choice = "*"
109    label_paths = natsorted(glob(os.path.join(path, "labels", f"{annotation_choice}_labels", "*.nii.gz")))
110    raw_dir = os.path.join(path, "volumes")
111    raw_paths = [
112        os.path.join(raw_dir, os.path.basename(fpath).replace(".nii.gz", "_0000.nii.gz")) for fpath in label_paths
113    ]
114
115    # NOTE: the label "100051_00001.nii.gz" returns the error: 'nibabel.filebasedimages.ImageFileError: Empty file'
116    # We simply do not consider the sample (and correspondign labels) for the dataset.
117    for rpath, lpath in zip(raw_paths, label_paths):
118        if rpath.find("100051_00001") != -1:
119            raw_paths.remove(rpath)
120
121        if lpath.find("100051_00001") != -1:
122            label_paths.remove(lpath)
123
124    assert len(raw_paths) == len(label_paths)
125
126    return raw_paths, label_paths
127
128
129def get_panorama_dataset(
130    path: Union[os.PathLike, str],
131    patch_shape: Tuple[int, ...],
132    annotation_choice: Optional[Literal["manual", "automatic"]] = None,
133    resize_inputs: bool = False,
134    download: bool = False, **kwargs
135) -> Dataset:
136    """Get the PANORAMA dataset for pancreatic lesion (and other structures) segmentation.
137
138    Args:
139        path: Filepath to a folder where the downloaded data will be saved.
140        patch_shape: The patch shape to use for training.
141        annotation_choice: The source of annotation.
142        resize_inputs: Whether to resize inputs to the desired patch shape.
143        download: Whether to download the data if it is not present.
144        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
145
146    Returns:
147        The segmentation dataset.
148    """
149    raw_paths, label_paths = get_panorama_paths(path, annotation_choice, download)
150
151    if resize_inputs:
152        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
153        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
154            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
155        )
156
157    return torch_em.default_segmentation_dataset(
158        raw_paths=raw_paths,
159        raw_key="data",
160        label_paths=label_paths,
161        label_key="data",
162        is_seg_dataset=True,
163        patch_shape=patch_shape,
164        **kwargs
165    )
166
167
168def get_panorama_loader(
169    path: Union[os.PathLike, str],
170    batch_size: int,
171    patch_shape: Tuple[int, ...],
172    annotation_choice: Optional[Literal["manual", "automatic"]] = None,
173    resize_inputs: bool = False,
174    download: bool = False,
175    **kwargs
176) -> DataLoader:
177    """Get the PANORAMA dataloader for pancreatic lesion (and other structures) segmentation.
178
179    Args:
180        path: Filepath to a folder where the downloaded data will be saved.
181        batch_size: The batch size for training.
182        patch_shape: The patch shape to use for training.
183        annotation_choice: The source of annotation.
184        resize_inputs: Whether to resize inputs to the desired patch shape.
185        download: Whether to download the data if it is not present.
186        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
187
188    Returns:
189        The DataLoader.
190    """
191    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
192    dataset = get_panorama_dataset(path, patch_shape, annotation_choice, resize_inputs, download, **ds_kwargs)
193    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URLS = {'batch_1': 'https://zenodo.org/records/13715870/files/batch_1.zip', 'batch_2': 'https://zenodo.org/records/13742336/files/batch_2.zip', 'batch_3': 'https://zenodo.org/records/11034011/files/batch_3.zip', 'batch_4': 'https://zenodo.org/records/10999754/files/batch_4.zip'}

CHECKSUMS = {'batch_1': 'aff39b6347650d6c7457adf7a04bfb0a651ab6ecd33676ff109bdab17bc41cff', 'batch_2': 'db6353a2c1c565c8bf084bd4fe1512fd6020b7675a1c9ab61b9a13d72a9fe76c', 'batch_3': 'c1d71b40948edc36f795a7801cc79000082df8d365c48574af50b36516d64cee', 'batch_4': '3b5341af79c2cc8b8a9fa3ab7a6cfa8fedf694538a3d6be97c18e5c82be4d9d8'}

def get_panorama_data(path: Union[os.PathLike, str], download: bool = False): View Source

51def get_panorama_data(path: Union[os.PathLike, str], download: bool = False):
52    """Download the PANORAMA data.
53
54    Args:
55        path: Filepath to a folder where the data is downloaded for further processing.
56        download: Whether to download the data if it is not present.
57    """
58    data_path = os.path.join(path, "volumes")
59    label_path = os.path.join(path, "labels")
60    if os.path.exists(data_path) and os.path.exists(label_path):
61        return
62
63    os.makedirs(path, exist_ok=True)
64
65    print("PANORAMA is a large dataset. I might take a while to download the volumes and respective labels.")
66
67    # Download the label volumes.
68    subprocess.call(
69        ["git", "clone", "--quiet", "https://github.com/DIAGNijmegen/panorama_labels", label_path]
70    )
71
72    def _move_batch_data_to_root(batch):
73        if batch in ["batch_3", "batch_4"]:
74            batch_dir = os.path.join(data_path, batch)
75
76            for fpath in glob(os.path.join(batch_dir, "*.nii.gz")):
77                shutil.move(src=fpath, dst=data_path)
78
79            if os.path.exists(batch_dir):
80                shutil.rmtree(batch_dir)
81
82    # Download the input volumes.
83    for batch in URLS.keys():
84        zip_path = os.path.join(path, f"{batch}.zip")
85        util.download_source(path=zip_path, url=URLS[batch], download=download, checksum=CHECKSUMS[batch])
86        util.unzip(zip_path=zip_path, dst=data_path)
87        _move_batch_data_to_root(batch)

Download the PANORAMA data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

def get_panorama_paths( path: Union[os.PathLike, str], annotation_choice: Optional[Literal['manual', 'automatic']] = None, download: bool = False) -> Tuple[List[str], List[str]]: View Source

 90def get_panorama_paths(
 91    path: Union[os.PathLike, str],
 92    annotation_choice: Optional[Literal["manual", "automatic"]] = None,
 93    download: bool = False
 94) -> Tuple[List[str], List[str]]:
 95    """Get paths to the PANORAMA data.
 96
 97    Args:
 98        path: Filepath to a folder where the downloaded data will be saved.
 99        annotation_choice: The source of annotation.
100        download: Whether to download the data if it is not present.
101
102    Returns:
103        List of filepaths for the image data.
104        List of filepaths for the label data.
105    """
106    get_panorama_data(path, download)
107
108    if annotation_choice is None:
109        annotation_choice = "*"
110    label_paths = natsorted(glob(os.path.join(path, "labels", f"{annotation_choice}_labels", "*.nii.gz")))
111    raw_dir = os.path.join(path, "volumes")
112    raw_paths = [
113        os.path.join(raw_dir, os.path.basename(fpath).replace(".nii.gz", "_0000.nii.gz")) for fpath in label_paths
114    ]
115
116    # NOTE: the label "100051_00001.nii.gz" returns the error: 'nibabel.filebasedimages.ImageFileError: Empty file'
117    # We simply do not consider the sample (and correspondign labels) for the dataset.
118    for rpath, lpath in zip(raw_paths, label_paths):
119        if rpath.find("100051_00001") != -1:
120            raw_paths.remove(rpath)
121
122        if lpath.find("100051_00001") != -1:
123            label_paths.remove(lpath)
124
125    assert len(raw_paths) == len(label_paths)
126
127    return raw_paths, label_paths

Get paths to the PANORAMA data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
annotation_choice: The source of annotation.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_panorama_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], annotation_choice: Optional[Literal['manual', 'automatic']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

130def get_panorama_dataset(
131    path: Union[os.PathLike, str],
132    patch_shape: Tuple[int, ...],
133    annotation_choice: Optional[Literal["manual", "automatic"]] = None,
134    resize_inputs: bool = False,
135    download: bool = False, **kwargs
136) -> Dataset:
137    """Get the PANORAMA dataset for pancreatic lesion (and other structures) segmentation.
138
139    Args:
140        path: Filepath to a folder where the downloaded data will be saved.
141        patch_shape: The patch shape to use for training.
142        annotation_choice: The source of annotation.
143        resize_inputs: Whether to resize inputs to the desired patch shape.
144        download: Whether to download the data if it is not present.
145        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
146
147    Returns:
148        The segmentation dataset.
149    """
150    raw_paths, label_paths = get_panorama_paths(path, annotation_choice, download)
151
152    if resize_inputs:
153        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
154        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
155            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
156        )
157
158    return torch_em.default_segmentation_dataset(
159        raw_paths=raw_paths,
160        raw_key="data",
161        label_paths=label_paths,
162        label_key="data",
163        is_seg_dataset=True,
164        patch_shape=patch_shape,
165        **kwargs
166    )

Get the PANORAMA dataset for pancreatic lesion (and other structures) segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
annotation_choice: The source of annotation.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_panorama_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], annotation_choice: Optional[Literal['manual', 'automatic']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

169def get_panorama_loader(
170    path: Union[os.PathLike, str],
171    batch_size: int,
172    patch_shape: Tuple[int, ...],
173    annotation_choice: Optional[Literal["manual", "automatic"]] = None,
174    resize_inputs: bool = False,
175    download: bool = False,
176    **kwargs
177) -> DataLoader:
178    """Get the PANORAMA dataloader for pancreatic lesion (and other structures) segmentation.
179
180    Args:
181        path: Filepath to a folder where the downloaded data will be saved.
182        batch_size: The batch size for training.
183        patch_shape: The patch shape to use for training.
184        annotation_choice: The source of annotation.
185        resize_inputs: Whether to resize inputs to the desired patch shape.
186        download: Whether to download the data if it is not present.
187        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
188
189    Returns:
190        The DataLoader.
191    """
192    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
193    dataset = get_panorama_dataset(path, patch_shape, annotation_choice, resize_inputs, download, **ds_kwargs)
194    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the PANORAMA dataloader for pancreatic lesion (and other structures) segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
annotation_choice: The source of annotation.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.