torch_em.data.datasets.medical.pengwin
The PENGWIN dataset contains annotation for pelvic bone fracture and fragments in CT and X-Ray images.
This dataset is from the challenge: https://pengwin.grand-challenge.org/pengwin/. This dataset is related to the publication: https://doi.org/10.1007/978-3-031-43996-4_30. Please cite them if you use this dataset for your publication.
1"""The PENGWIN dataset contains annotation for pelvic bone fracture and 2fragments in CT and X-Ray images. 3 4This dataset is from the challenge: https://pengwin.grand-challenge.org/pengwin/. 5This dataset is related to the publication: https://doi.org/10.1007/978-3-031-43996-4_30. 6Please cite them if you use this dataset for your publication. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Tuple, Literal, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URLS = { 22 "CT": [ 23 "https://zenodo.org/records/10927452/files/PENGWIN_CT_train_images_part1.zip", # inputs part 1 24 "https://zenodo.org/records/10927452/files/PENGWIN_CT_train_images_part2.zip", # inputs part 2 25 "https://zenodo.org/records/10927452/files/PENGWIN_CT_train_labels.zip", # labels 26 ], 27 "X-Ray": ["https://zenodo.org/records/10913196/files/train.zip"] 28} 29 30CHECKSUMS = { 31 "CT": [ 32 "e2e9f99798960607ffced1fbdeee75a626c41bf859eaf4125029a38fac6b7609", # inputs part 1 33 "19f3cdc5edd1daf9324c70f8ba683eed054f6ed8f2b1cc59dbd80724f8f0bbb2", # inputs part 2 34 "c4d3857e02d3ee5d0df6c8c918dd3cf5a7c9419135f1ec089b78215f37c6665c" # labels 35 ], 36 "X-Ray": ["48d107979eb929a3c61da4e75566306a066408954cf132907bda570f2a7de725"] 37} 38 39TARGET_DIRS = { 40 "CT": ["CT/images", "CT/images", "CT/labels"], 41 "X-Ray": ["X-Ray"] 42} 43 44MODALITIES = ["CT", "X-Ray"] 45 46 47def get_pengwin_data( 48 path: Union[os.PathLike, str], modality: Literal["CT", "X-Ray"], download: bool = False 49) -> str: 50 """Download the PENGWIN dataset. 51 52 Args: 53 path: Filepath to a folder where the data is downloaded for further processing. 54 modality: The choice of modality for inputs. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 Filepath where the data is downlaoded. 59 """ 60 if not isinstance(modality, str) and modality in MODALITIES: 61 raise ValueError(f"'{modality}' is not a valid modality. Please choose from {MODALITIES}.") 62 63 data_dir = os.path.join(path, "data") 64 if os.path.exists(os.path.join(data_dir, modality)): 65 return data_dir 66 67 os.makedirs(path, exist_ok=True) 68 69 for url, checksum, dst_dir in zip(URLS[modality], CHECKSUMS[modality], TARGET_DIRS[modality]): 70 zip_path = os.path.join(path, os.path.split(url)[-1]) 71 util.download_source(path=zip_path, url=url, download=download, checksum=checksum) 72 util.unzip(zip_path=zip_path, dst=os.path.join(data_dir, dst_dir)) 73 74 return data_dir 75 76 77def get_pengwin_paths( 78 path: Union[os.PathLike, str], modality: Literal["CT", "X-Ray"], download: bool = False 79) -> Tuple[List[str], List[str]]: 80 """Get paths to the PENGWIN data. 81 82 Args: 83 path: Filepath to a folder where the data is downloaded for further processing. 84 modality: The choice of modality for inputs. 85 download: Whether to download the data if it is not present. 86 87 Returns: 88 List of filepaths for the image data. 89 List of filepaths for the label data. 90 """ 91 data_dir = get_pengwin_data(path=path, modality=modality, download=download) 92 93 if modality == "CT": 94 image_paths = natsorted(glob(os.path.join(data_dir, modality, "images", "*.mha"))) 95 gt_paths = natsorted(glob(os.path.join(data_dir, modality, "labels", "*.mha"))) 96 else: # X-Ray 97 base_dir = os.path.join(data_dir, modality, "train") 98 image_paths = natsorted(glob(os.path.join(base_dir, "input", "images", "*.tif"))) 99 gt_paths = natsorted(glob(os.path.join(base_dir, "output", "images", "*.tif"))) 100 101 return image_paths, gt_paths 102 103 104def get_pengwin_dataset( 105 path: Union[os.PathLike, str], 106 patch_shape: Tuple[int, ...], 107 modality: Literal["CT", "X-Ray"], 108 resize_inputs: bool = False, 109 download: bool = False, 110 **kwargs 111) -> Dataset: 112 """Get the PENGWIN dataset for pelvic fracture segmentation. 113 114 Args: 115 path: Filepath to a folder where the data is downloaded for further processing. 116 patch_shape: The patch shape to use for training. 117 modality: The choice of modality for inputs. 118 resize_inputs: Whether to resize inputs to the desired patch shape. 119 download: Whether to download the data if it is not present. 120 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 121 122 Returns: 123 The segmentation dataset. 124 """ 125 image_paths, gt_paths = get_pengwin_paths(path=path, modality=modality, download=download) 126 127 if resize_inputs: 128 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 129 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 130 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 131 ) 132 133 return torch_em.default_segmentation_dataset( 134 raw_paths=image_paths, 135 raw_key=None, 136 label_paths=gt_paths, 137 label_key=None, 138 patch_shape=patch_shape, 139 **kwargs 140 ) 141 142 143def get_pengwin_loader( 144 path: Union[os.PathLike, str], 145 batch_size: int, 146 patch_shape: Tuple[int, ...], 147 modality: Literal["CT", "X-Ray"], 148 resize_inputs: bool = False, 149 download: bool = False, 150 **kwargs 151) -> DataLoader: 152 """Get the PENGWIN dataloader for pelvic fracture segmentation. 153 154 Args: 155 path: Filepath to a folder where the data is downloaded for further processing. 156 batch_size: The batch size for training. 157 patch_shape: The patch shape to use for training. 158 modality: The choice of modality for inputs. 159 resize_inputs: Whether to resize inputs to the desired patch shape. 160 download: Whether to download the data if it is not present. 161 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 162 163 Returns: 164 The DataLoader. 165 """ 166 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 167 dataset = get_pengwin_dataset(path, patch_shape, modality, resize_inputs, download, **ds_kwargs) 168 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URLS =
{'CT': ['https://zenodo.org/records/10927452/files/PENGWIN_CT_train_images_part1.zip', 'https://zenodo.org/records/10927452/files/PENGWIN_CT_train_images_part2.zip', 'https://zenodo.org/records/10927452/files/PENGWIN_CT_train_labels.zip'], 'X-Ray': ['https://zenodo.org/records/10913196/files/train.zip']}
CHECKSUMS =
{'CT': ['e2e9f99798960607ffced1fbdeee75a626c41bf859eaf4125029a38fac6b7609', '19f3cdc5edd1daf9324c70f8ba683eed054f6ed8f2b1cc59dbd80724f8f0bbb2', 'c4d3857e02d3ee5d0df6c8c918dd3cf5a7c9419135f1ec089b78215f37c6665c'], 'X-Ray': ['48d107979eb929a3c61da4e75566306a066408954cf132907bda570f2a7de725']}
TARGET_DIRS =
{'CT': ['CT/images', 'CT/images', 'CT/labels'], 'X-Ray': ['X-Ray']}
MODALITIES =
['CT', 'X-Ray']
def
get_pengwin_data( path: Union[os.PathLike, str], modality: Literal['CT', 'X-Ray'], download: bool = False) -> str:
48def get_pengwin_data( 49 path: Union[os.PathLike, str], modality: Literal["CT", "X-Ray"], download: bool = False 50) -> str: 51 """Download the PENGWIN dataset. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 modality: The choice of modality for inputs. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 Filepath where the data is downlaoded. 60 """ 61 if not isinstance(modality, str) and modality in MODALITIES: 62 raise ValueError(f"'{modality}' is not a valid modality. Please choose from {MODALITIES}.") 63 64 data_dir = os.path.join(path, "data") 65 if os.path.exists(os.path.join(data_dir, modality)): 66 return data_dir 67 68 os.makedirs(path, exist_ok=True) 69 70 for url, checksum, dst_dir in zip(URLS[modality], CHECKSUMS[modality], TARGET_DIRS[modality]): 71 zip_path = os.path.join(path, os.path.split(url)[-1]) 72 util.download_source(path=zip_path, url=url, download=download, checksum=checksum) 73 util.unzip(zip_path=zip_path, dst=os.path.join(data_dir, dst_dir)) 74 75 return data_dir
Download the PENGWIN dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- modality: The choice of modality for inputs.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downlaoded.
def
get_pengwin_paths( path: Union[os.PathLike, str], modality: Literal['CT', 'X-Ray'], download: bool = False) -> Tuple[List[str], List[str]]:
78def get_pengwin_paths( 79 path: Union[os.PathLike, str], modality: Literal["CT", "X-Ray"], download: bool = False 80) -> Tuple[List[str], List[str]]: 81 """Get paths to the PENGWIN data. 82 83 Args: 84 path: Filepath to a folder where the data is downloaded for further processing. 85 modality: The choice of modality for inputs. 86 download: Whether to download the data if it is not present. 87 88 Returns: 89 List of filepaths for the image data. 90 List of filepaths for the label data. 91 """ 92 data_dir = get_pengwin_data(path=path, modality=modality, download=download) 93 94 if modality == "CT": 95 image_paths = natsorted(glob(os.path.join(data_dir, modality, "images", "*.mha"))) 96 gt_paths = natsorted(glob(os.path.join(data_dir, modality, "labels", "*.mha"))) 97 else: # X-Ray 98 base_dir = os.path.join(data_dir, modality, "train") 99 image_paths = natsorted(glob(os.path.join(base_dir, "input", "images", "*.tif"))) 100 gt_paths = natsorted(glob(os.path.join(base_dir, "output", "images", "*.tif"))) 101 102 return image_paths, gt_paths
Get paths to the PENGWIN data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- modality: The choice of modality for inputs.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_pengwin_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], modality: Literal['CT', 'X-Ray'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
105def get_pengwin_dataset( 106 path: Union[os.PathLike, str], 107 patch_shape: Tuple[int, ...], 108 modality: Literal["CT", "X-Ray"], 109 resize_inputs: bool = False, 110 download: bool = False, 111 **kwargs 112) -> Dataset: 113 """Get the PENGWIN dataset for pelvic fracture segmentation. 114 115 Args: 116 path: Filepath to a folder where the data is downloaded for further processing. 117 patch_shape: The patch shape to use for training. 118 modality: The choice of modality for inputs. 119 resize_inputs: Whether to resize inputs to the desired patch shape. 120 download: Whether to download the data if it is not present. 121 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 122 123 Returns: 124 The segmentation dataset. 125 """ 126 image_paths, gt_paths = get_pengwin_paths(path=path, modality=modality, download=download) 127 128 if resize_inputs: 129 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 130 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 131 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 132 ) 133 134 return torch_em.default_segmentation_dataset( 135 raw_paths=image_paths, 136 raw_key=None, 137 label_paths=gt_paths, 138 label_key=None, 139 patch_shape=patch_shape, 140 **kwargs 141 )
Get the PENGWIN dataset for pelvic fracture segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- modality: The choice of modality for inputs.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_pengwin_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], modality: Literal['CT', 'X-Ray'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
144def get_pengwin_loader( 145 path: Union[os.PathLike, str], 146 batch_size: int, 147 patch_shape: Tuple[int, ...], 148 modality: Literal["CT", "X-Ray"], 149 resize_inputs: bool = False, 150 download: bool = False, 151 **kwargs 152) -> DataLoader: 153 """Get the PENGWIN dataloader for pelvic fracture segmentation. 154 155 Args: 156 path: Filepath to a folder where the data is downloaded for further processing. 157 batch_size: The batch size for training. 158 patch_shape: The patch shape to use for training. 159 modality: The choice of modality for inputs. 160 resize_inputs: Whether to resize inputs to the desired patch shape. 161 download: Whether to download the data if it is not present. 162 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 163 164 Returns: 165 The DataLoader. 166 """ 167 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 168 dataset = get_pengwin_dataset(path, patch_shape, modality, resize_inputs, download, **ds_kwargs) 169 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the PENGWIN dataloader for pelvic fracture segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- modality: The choice of modality for inputs.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.