torch_em.data.datasets.medical.ct_cadaiver
The CadAIver dataset contains annotations for vertebrae segmentation in CT scans.
The dataset is located at https://zenodo.org/records/10053317. This dataset is from the publication https://doi.org/10.1038/s41597-024-03191-6. Please cite it if you use this dataset for your research.
1"""The CadAIver dataset contains annotations for vertebrae segmentation 2in CT scans. 3 4The dataset is located at https://zenodo.org/records/10053317. 5This dataset is from the publication https://doi.org/10.1038/s41597-024-03191-6. 6Please cite it if you use this dataset for your research. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Tuple, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://zenodo.org/records/10053317/files/CadAIver%20study.zip" 22CHECKSUM = "" 23 24 25def get_ct_cadaiver_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Download the CadAIver dataset. 27 28 Args: 29 path: Filepath to a folder where the data is downloaded for further processing. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 Filepath where the data is downloaded. 34 """ 35 data_dir = os.path.join(path, "CadAIver study") 36 if os.path.exists(data_dir): 37 return data_dir 38 39 os.makedirs(path, exist_ok=True) 40 41 zip_path = os.path.join(path, "CadAIver study.zip") 42 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip(zip_path=zip_path, dst=path) 44 45 return data_dir 46 47 48def get_ct_cadaiver_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]: 49 """Get paths to the CadAIver data. 50 51 Args: 52 path: Filepath to a folder where the downloaded data is stored. 53 download: Whether to download the data if it is not present. 54 55 Returns: 56 List of filepaths for the image data. 57 List of filepaths for the label data. 58 """ 59 data_dir = get_ct_cadaiver_data(path, download) 60 61 raw_paths = natsorted(glob(os.path.join(data_dir, "Images", "*.nii.gz"))) 62 label_paths = [p.replace("Images", "Segmentations") for p in raw_paths] 63 label_paths = [p.replace(".nii.gz", "_seg.nii.gz") for p in label_paths] 64 65 assert len(raw_paths) == len(label_paths) 66 67 return raw_paths, label_paths 68 69 70def get_ct_cadaiver_dataset( 71 path: Union[os.PathLike, str], 72 patch_shape: Tuple[int, ...], 73 resize_inputs: bool = False, 74 download: bool = False, 75 **kwargs 76) -> Dataset: 77 """Get the CadAIver dataset for vertebrae segmentation. 78 79 Args: 80 path: Filepath to a folder where the data is downloaded for further processing. 81 patch_shape: The patch shape to use for training. 82 resize_inputs: Whether to resize inputs to the desired patch shape. 83 download: Whether to download the data if it is not present. 84 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 85 86 Returns: 87 The segmentation dataset. 88 """ 89 raw_paths, label_paths = get_ct_cadaiver_paths(path, download) 90 91 if resize_inputs: 92 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 93 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 94 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 95 ) 96 97 return torch_em.default_segmentation_dataset( 98 raw_paths=raw_paths, 99 raw_key="data", 100 label_paths=label_paths, 101 label_key="data", 102 is_seg_dataset=True, 103 patch_shape=patch_shape, 104 **kwargs 105 ) 106 107 108def get_ct_cadaiver_loader( 109 path: Union[os.PathLike, str], 110 batch_size: int, 111 patch_shape: Tuple[int, ...], 112 resize_inputs: bool = False, 113 download: bool = False, 114 **kwargs 115) -> DataLoader: 116 """Get the CadAIver dataset for vertebrae segmentation. 117 118 Args: 119 path: Filepath to a folder where the data is downloaded for further processing. 120 batch_size: The batch size for training. 121 patch_shape: The patch shape to use for training. 122 resize_inputs: Whether to resize inputs to the desired patch shape. 123 download: Whether to download the data if it is not present. 124 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 125 126 Returns: 127 The DataLoader. 128 """ 129 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 130 dataset = get_ct_cadaiver_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 131 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/10053317/files/CadAIver%20study.zip'
CHECKSUM =
''
def
get_ct_cadaiver_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_ct_cadaiver_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Download the CadAIver dataset. 28 29 Args: 30 path: Filepath to a folder where the data is downloaded for further processing. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 Filepath where the data is downloaded. 35 """ 36 data_dir = os.path.join(path, "CadAIver study") 37 if os.path.exists(data_dir): 38 return data_dir 39 40 os.makedirs(path, exist_ok=True) 41 42 zip_path = os.path.join(path, "CadAIver study.zip") 43 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 44 util.unzip(zip_path=zip_path, dst=path) 45 46 return data_dir
Download the CadAIver dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_ct_cadaiver_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]:
49def get_ct_cadaiver_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]: 50 """Get paths to the CadAIver data. 51 52 Args: 53 path: Filepath to a folder where the downloaded data is stored. 54 download: Whether to download the data if it is not present. 55 56 Returns: 57 List of filepaths for the image data. 58 List of filepaths for the label data. 59 """ 60 data_dir = get_ct_cadaiver_data(path, download) 61 62 raw_paths = natsorted(glob(os.path.join(data_dir, "Images", "*.nii.gz"))) 63 label_paths = [p.replace("Images", "Segmentations") for p in raw_paths] 64 label_paths = [p.replace(".nii.gz", "_seg.nii.gz") for p in label_paths] 65 66 assert len(raw_paths) == len(label_paths) 67 68 return raw_paths, label_paths
Get paths to the CadAIver data.
Arguments:
- path: Filepath to a folder where the downloaded data is stored.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_ct_cadaiver_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
71def get_ct_cadaiver_dataset( 72 path: Union[os.PathLike, str], 73 patch_shape: Tuple[int, ...], 74 resize_inputs: bool = False, 75 download: bool = False, 76 **kwargs 77) -> Dataset: 78 """Get the CadAIver dataset for vertebrae segmentation. 79 80 Args: 81 path: Filepath to a folder where the data is downloaded for further processing. 82 patch_shape: The patch shape to use for training. 83 resize_inputs: Whether to resize inputs to the desired patch shape. 84 download: Whether to download the data if it is not present. 85 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 86 87 Returns: 88 The segmentation dataset. 89 """ 90 raw_paths, label_paths = get_ct_cadaiver_paths(path, download) 91 92 if resize_inputs: 93 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 94 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 95 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 96 ) 97 98 return torch_em.default_segmentation_dataset( 99 raw_paths=raw_paths, 100 raw_key="data", 101 label_paths=label_paths, 102 label_key="data", 103 is_seg_dataset=True, 104 patch_shape=patch_shape, 105 **kwargs 106 )
Get the CadAIver dataset for vertebrae segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_ct_cadaiver_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
109def get_ct_cadaiver_loader( 110 path: Union[os.PathLike, str], 111 batch_size: int, 112 patch_shape: Tuple[int, ...], 113 resize_inputs: bool = False, 114 download: bool = False, 115 **kwargs 116) -> DataLoader: 117 """Get the CadAIver dataset for vertebrae segmentation. 118 119 Args: 120 path: Filepath to a folder where the data is downloaded for further processing. 121 batch_size: The batch size for training. 122 patch_shape: The patch shape to use for training. 123 resize_inputs: Whether to resize inputs to the desired patch shape. 124 download: Whether to download the data if it is not present. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 126 127 Returns: 128 The DataLoader. 129 """ 130 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 131 dataset = get_ct_cadaiver_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 132 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the CadAIver dataset for vertebrae segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.