torch_em.data.datasets.medical.panorama
The PANORAMA dataset contains annotation for PDAC lesion, veins, arteries, pancreas parenchyma, pancreatic duct and common bile duct segmentation in CT scans.
The dataset is from the PANORAMA challenge: https://panorama.grand-challenge.org/.
NOTE: The latest information for the label legends are located at: https://github.com/DIAGNijmegen/panorama_labels#label-legend. The label legends are described as follows:
- background: 0
- PDAC lesion: 1
- veins: 2
- arteries: 3
- pancreas parenchyma: 4
- pancreatic duct: 5
- common bile duct: 6
This dataset is from the article: https://doi.org/10.5281/zenodo.10599559 Please cite it if you use this dataset in your research.
1"""The PANORAMA dataset contains annotation for PDAC lesion, veins, arteries, pancreas parenchyma, 2pancreatic duct and common bile duct segmentation in CT scans. 3 4The dataset is from the PANORAMA challenge: https://panorama.grand-challenge.org/. 5 6NOTE: The latest information for the label legends are located at: 7https://github.com/DIAGNijmegen/panorama_labels#label-legend. 8The label legends are described as follows: 9- background: 0 10- PDAC lesion: 1 11- veins: 2 12- arteries: 3 13- pancreas parenchyma: 4 14- pancreatic duct: 5 15- common bile duct: 6 16 17This dataset is from the article: https://doi.org/10.5281/zenodo.10599559 18Please cite it if you use this dataset in your research. 19""" 20 21import os 22import shutil 23import subprocess 24from glob import glob 25from natsort import natsorted 26from typing import Union, Tuple, Optional, Literal, List 27 28from torch.utils.data import Dataset, DataLoader 29 30import torch_em 31 32from .. import util 33 34 35URLS = { 36 "batch_1": "https://zenodo.org/records/13715870/files/batch_1.zip", 37 "batch_2": "https://zenodo.org/records/13742336/files/batch_2.zip", 38 "batch_3": "https://zenodo.org/records/11034011/files/batch_3.zip", 39 "batch_4": "https://zenodo.org/records/10999754/files/batch_4.zip", 40} 41 42CHECKSUMS = { 43 "batch_1": "aff39b6347650d6c7457adf7a04bfb0a651ab6ecd33676ff109bdab17bc41cff", 44 "batch_2": "db6353a2c1c565c8bf084bd4fe1512fd6020b7675a1c9ab61b9a13d72a9fe76c", 45 "batch_3": "c1d71b40948edc36f795a7801cc79000082df8d365c48574af50b36516d64cee", 46 "batch_4": "3b5341af79c2cc8b8a9fa3ab7a6cfa8fedf694538a3d6be97c18e5c82be4d9d8", 47} 48 49 50def get_panorama_data(path: Union[os.PathLike, str], download: bool = False): 51 """Download the PANORAMA data. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 download: Whether to download the data if it is not present. 56 """ 57 data_path = os.path.join(path, "volumes") 58 label_path = os.path.join(path, "labels") 59 if os.path.exists(data_path) and os.path.exists(label_path): 60 return 61 62 os.makedirs(path, exist_ok=True) 63 64 print("PANORAMA is a large dataset. I might take a while to download the volumes and respective labels.") 65 66 # Download the label volumes. 67 subprocess.call( 68 ["git", "clone", "--quiet", "https://github.com/DIAGNijmegen/panorama_labels", label_path] 69 ) 70 71 def _move_batch_data_to_root(batch): 72 if batch in ["batch_3", "batch_4"]: 73 batch_dir = os.path.join(data_path, batch) 74 75 for fpath in glob(os.path.join(batch_dir, "*.nii.gz")): 76 shutil.move(src=fpath, dst=data_path) 77 78 if os.path.exists(batch_dir): 79 shutil.rmtree(batch_dir) 80 81 # Download the input volumes. 82 for batch in URLS.keys(): 83 zip_path = os.path.join(path, f"{batch}.zip") 84 util.download_source(path=zip_path, url=URLS[batch], download=download, checksum=CHECKSUMS[batch]) 85 util.unzip(zip_path=zip_path, dst=data_path) 86 _move_batch_data_to_root(batch) 87 88 89def get_panorama_paths( 90 path: Union[os.PathLike, str], 91 annotation_choice: Optional[Literal["manual", "automatic"]] = None, 92 download: bool = False 93) -> Tuple[List[str], List[str]]: 94 """Get paths to the PANORAMA data. 95 96 Args: 97 path: Filepath to a folder where the downloaded data will be saved. 98 annotation_choice: The source of annotation. 99 download: Whether to download the data if it is not present. 100 101 Returns: 102 List of filepaths for the image data. 103 List of filepaths for the label data. 104 """ 105 get_panorama_data(path, download) 106 107 if annotation_choice is None: 108 annotation_choice = "*" 109 label_paths = natsorted(glob(os.path.join(path, "labels", f"{annotation_choice}_labels", "*.nii.gz"))) 110 raw_dir = os.path.join(path, "volumes") 111 raw_paths = [ 112 os.path.join(raw_dir, os.path.basename(fpath).replace(".nii.gz", "_0000.nii.gz")) for fpath in label_paths 113 ] 114 115 # NOTE: the label "100051_00001.nii.gz" returns the error: 'nibabel.filebasedimages.ImageFileError: Empty file' 116 # We simply do not consider the sample (and correspondign labels) for the dataset. 117 for rpath, lpath in zip(raw_paths, label_paths): 118 if rpath.find("100051_00001") != -1: 119 raw_paths.remove(rpath) 120 121 if lpath.find("100051_00001") != -1: 122 label_paths.remove(lpath) 123 124 assert len(raw_paths) == len(label_paths) 125 126 return raw_paths, label_paths 127 128 129def get_panorama_dataset( 130 path: Union[os.PathLike, str], 131 patch_shape: Tuple[int, ...], 132 annotation_choice: Optional[Literal["manual", "automatic"]] = None, 133 resize_inputs: bool = False, 134 download: bool = False, **kwargs 135) -> Dataset: 136 """Get the PANORAMA dataset for pancreatic lesion (and other structures) segmentation. 137 138 Args: 139 path: Filepath to a folder where the downloaded data will be saved. 140 patch_shape: The patch shape to use for training. 141 annotation_choice: The source of annotation. 142 resize_inputs: Whether to resize inputs to the desired patch shape. 143 download: Whether to download the data if it is not present. 144 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 145 146 Returns: 147 The segmentation dataset. 148 """ 149 raw_paths, label_paths = get_panorama_paths(path, annotation_choice, download) 150 151 if resize_inputs: 152 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 153 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 154 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 155 ) 156 157 return torch_em.default_segmentation_dataset( 158 raw_paths=raw_paths, 159 raw_key="data", 160 label_paths=label_paths, 161 label_key="data", 162 is_seg_dataset=True, 163 patch_shape=patch_shape, 164 **kwargs 165 ) 166 167 168def get_panorama_loader( 169 path: Union[os.PathLike, str], 170 batch_size: int, 171 patch_shape: Tuple[int, ...], 172 annotation_choice: Optional[Literal["manual", "automatic"]] = None, 173 resize_inputs: bool = False, 174 download: bool = False, 175 **kwargs 176) -> DataLoader: 177 """Get the PANORAMA dataloader for pancreatic lesion (and other structures) segmentation. 178 179 Args: 180 path: Filepath to a folder where the downloaded data will be saved. 181 batch_size: The batch size for training. 182 patch_shape: The patch shape to use for training. 183 annotation_choice: The source of annotation. 184 resize_inputs: Whether to resize inputs to the desired patch shape. 185 download: Whether to download the data if it is not present. 186 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 187 188 Returns: 189 The DataLoader. 190 """ 191 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 192 dataset = get_panorama_dataset(path, patch_shape, annotation_choice, resize_inputs, download, **ds_kwargs) 193 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
51def get_panorama_data(path: Union[os.PathLike, str], download: bool = False): 52 """Download the PANORAMA data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 download: Whether to download the data if it is not present. 57 """ 58 data_path = os.path.join(path, "volumes") 59 label_path = os.path.join(path, "labels") 60 if os.path.exists(data_path) and os.path.exists(label_path): 61 return 62 63 os.makedirs(path, exist_ok=True) 64 65 print("PANORAMA is a large dataset. I might take a while to download the volumes and respective labels.") 66 67 # Download the label volumes. 68 subprocess.call( 69 ["git", "clone", "--quiet", "https://github.com/DIAGNijmegen/panorama_labels", label_path] 70 ) 71 72 def _move_batch_data_to_root(batch): 73 if batch in ["batch_3", "batch_4"]: 74 batch_dir = os.path.join(data_path, batch) 75 76 for fpath in glob(os.path.join(batch_dir, "*.nii.gz")): 77 shutil.move(src=fpath, dst=data_path) 78 79 if os.path.exists(batch_dir): 80 shutil.rmtree(batch_dir) 81 82 # Download the input volumes. 83 for batch in URLS.keys(): 84 zip_path = os.path.join(path, f"{batch}.zip") 85 util.download_source(path=zip_path, url=URLS[batch], download=download, checksum=CHECKSUMS[batch]) 86 util.unzip(zip_path=zip_path, dst=data_path) 87 _move_batch_data_to_root(batch)
Download the PANORAMA data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
90def get_panorama_paths( 91 path: Union[os.PathLike, str], 92 annotation_choice: Optional[Literal["manual", "automatic"]] = None, 93 download: bool = False 94) -> Tuple[List[str], List[str]]: 95 """Get paths to the PANORAMA data. 96 97 Args: 98 path: Filepath to a folder where the downloaded data will be saved. 99 annotation_choice: The source of annotation. 100 download: Whether to download the data if it is not present. 101 102 Returns: 103 List of filepaths for the image data. 104 List of filepaths for the label data. 105 """ 106 get_panorama_data(path, download) 107 108 if annotation_choice is None: 109 annotation_choice = "*" 110 label_paths = natsorted(glob(os.path.join(path, "labels", f"{annotation_choice}_labels", "*.nii.gz"))) 111 raw_dir = os.path.join(path, "volumes") 112 raw_paths = [ 113 os.path.join(raw_dir, os.path.basename(fpath).replace(".nii.gz", "_0000.nii.gz")) for fpath in label_paths 114 ] 115 116 # NOTE: the label "100051_00001.nii.gz" returns the error: 'nibabel.filebasedimages.ImageFileError: Empty file' 117 # We simply do not consider the sample (and correspondign labels) for the dataset. 118 for rpath, lpath in zip(raw_paths, label_paths): 119 if rpath.find("100051_00001") != -1: 120 raw_paths.remove(rpath) 121 122 if lpath.find("100051_00001") != -1: 123 label_paths.remove(lpath) 124 125 assert len(raw_paths) == len(label_paths) 126 127 return raw_paths, label_paths
Get paths to the PANORAMA data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- annotation_choice: The source of annotation.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
130def get_panorama_dataset( 131 path: Union[os.PathLike, str], 132 patch_shape: Tuple[int, ...], 133 annotation_choice: Optional[Literal["manual", "automatic"]] = None, 134 resize_inputs: bool = False, 135 download: bool = False, **kwargs 136) -> Dataset: 137 """Get the PANORAMA dataset for pancreatic lesion (and other structures) segmentation. 138 139 Args: 140 path: Filepath to a folder where the downloaded data will be saved. 141 patch_shape: The patch shape to use for training. 142 annotation_choice: The source of annotation. 143 resize_inputs: Whether to resize inputs to the desired patch shape. 144 download: Whether to download the data if it is not present. 145 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 146 147 Returns: 148 The segmentation dataset. 149 """ 150 raw_paths, label_paths = get_panorama_paths(path, annotation_choice, download) 151 152 if resize_inputs: 153 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 154 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 155 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 156 ) 157 158 return torch_em.default_segmentation_dataset( 159 raw_paths=raw_paths, 160 raw_key="data", 161 label_paths=label_paths, 162 label_key="data", 163 is_seg_dataset=True, 164 patch_shape=patch_shape, 165 **kwargs 166 )
Get the PANORAMA dataset for pancreatic lesion (and other structures) segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- annotation_choice: The source of annotation.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
169def get_panorama_loader( 170 path: Union[os.PathLike, str], 171 batch_size: int, 172 patch_shape: Tuple[int, ...], 173 annotation_choice: Optional[Literal["manual", "automatic"]] = None, 174 resize_inputs: bool = False, 175 download: bool = False, 176 **kwargs 177) -> DataLoader: 178 """Get the PANORAMA dataloader for pancreatic lesion (and other structures) segmentation. 179 180 Args: 181 path: Filepath to a folder where the downloaded data will be saved. 182 batch_size: The batch size for training. 183 patch_shape: The patch shape to use for training. 184 annotation_choice: The source of annotation. 185 resize_inputs: Whether to resize inputs to the desired patch shape. 186 download: Whether to download the data if it is not present. 187 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 188 189 Returns: 190 The DataLoader. 191 """ 192 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 193 dataset = get_panorama_dataset(path, patch_shape, annotation_choice, resize_inputs, download, **ds_kwargs) 194 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the PANORAMA dataloader for pancreatic lesion (and other structures) segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- annotation_choice: The source of annotation.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.