torch_em.data.datasets.medical.ct_cadaiver

The CadAIver dataset contains annotations for vertebrae segmentation in CT scans.

The dataset is located at https://zenodo.org/records/10053317. This dataset is from the publication https://doi.org/10.1038/s41597-024-03191-6. Please cite it if you use this dataset for your research.

  1"""The CadAIver dataset contains annotations for vertebrae segmentation
  2in CT scans.
  3
  4The dataset is located at https://zenodo.org/records/10053317.
  5This dataset is from the publication https://doi.org/10.1038/s41597-024-03191-6.
  6Please cite it if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/10053317/files/CadAIver%20study.zip"
 22CHECKSUM = ""
 23
 24
 25def get_ct_cadaiver_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the CadAIver dataset.
 27
 28    Args:
 29        path: Filepath to a folder where the data is downloaded for further processing.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        Filepath where the data is downloaded.
 34    """
 35    data_dir = os.path.join(path, "CadAIver study")
 36    if os.path.exists(data_dir):
 37        return data_dir
 38
 39    os.makedirs(path, exist_ok=True)
 40
 41    zip_path = os.path.join(path, "CadAIver study.zip")
 42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 43    util.unzip(zip_path=zip_path, dst=path)
 44
 45    return data_dir
 46
 47
 48def get_ct_cadaiver_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]:
 49    """Get paths to the CadAIver data.
 50
 51    Args:
 52        path: Filepath to a folder where the downloaded data is stored.
 53        download: Whether to download the data if it is not present.
 54
 55    Returns:
 56        List of filepaths for the image data.
 57        List of filepaths for the label data.
 58    """
 59    data_dir = get_ct_cadaiver_data(path, download)
 60
 61    raw_paths = natsorted(glob(os.path.join(data_dir, "Images", "*.nii.gz")))
 62    label_paths = [p.replace("Images", "Segmentations") for p in raw_paths]
 63    label_paths = [p.replace(".nii.gz", "_seg.nii.gz") for p in label_paths]
 64
 65    assert len(raw_paths) == len(label_paths)
 66
 67    return raw_paths, label_paths
 68
 69
 70def get_ct_cadaiver_dataset(
 71    path: Union[os.PathLike, str],
 72    patch_shape: Tuple[int, ...],
 73    resize_inputs: bool = False,
 74    download: bool = False,
 75    **kwargs
 76) -> Dataset:
 77    """Get the CadAIver dataset for vertebrae segmentation.
 78
 79    Args:
 80        path: Filepath to a folder where the data is downloaded for further processing.
 81        patch_shape: The patch shape to use for training.
 82        resize_inputs: Whether to resize inputs to the desired patch shape.
 83        download: Whether to download the data if it is not present.
 84        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 85
 86    Returns:
 87        The segmentation dataset.
 88    """
 89    raw_paths, label_paths = get_ct_cadaiver_paths(path, download)
 90
 91    if resize_inputs:
 92        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
 93        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
 94            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
 95        )
 96
 97    return torch_em.default_segmentation_dataset(
 98        raw_paths=raw_paths,
 99        raw_key="data",
100        label_paths=label_paths,
101        label_key="data",
102        is_seg_dataset=True,
103        patch_shape=patch_shape,
104        **kwargs
105    )
106
107
108def get_ct_cadaiver_loader(
109    path: Union[os.PathLike, str],
110    batch_size: int,
111    patch_shape: Tuple[int, ...],
112    resize_inputs: bool = False,
113    download: bool = False,
114    **kwargs
115) -> DataLoader:
116    """Get the CadAIver dataset for vertebrae segmentation.
117
118    Args:
119        path: Filepath to a folder where the data is downloaded for further processing.
120        batch_size: The batch size for training.
121        patch_shape: The patch shape to use for training.
122        resize_inputs: Whether to resize inputs to the desired patch shape.
123        download: Whether to download the data if it is not present.
124        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
125
126    Returns:
127        The DataLoader.
128    """
129    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
130    dataset = get_ct_cadaiver_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
131    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/10053317/files/CadAIver%20study.zip'
CHECKSUM = ''
def get_ct_cadaiver_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_ct_cadaiver_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the CadAIver dataset.
28
29    Args:
30        path: Filepath to a folder where the data is downloaded for further processing.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        Filepath where the data is downloaded.
35    """
36    data_dir = os.path.join(path, "CadAIver study")
37    if os.path.exists(data_dir):
38        return data_dir
39
40    os.makedirs(path, exist_ok=True)
41
42    zip_path = os.path.join(path, "CadAIver study.zip")
43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
44    util.unzip(zip_path=zip_path, dst=path)
45
46    return data_dir

Download the CadAIver dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_ct_cadaiver_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]:
49def get_ct_cadaiver_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]:
50    """Get paths to the CadAIver data.
51
52    Args:
53        path: Filepath to a folder where the downloaded data is stored.
54        download: Whether to download the data if it is not present.
55
56    Returns:
57        List of filepaths for the image data.
58        List of filepaths for the label data.
59    """
60    data_dir = get_ct_cadaiver_data(path, download)
61
62    raw_paths = natsorted(glob(os.path.join(data_dir, "Images", "*.nii.gz")))
63    label_paths = [p.replace("Images", "Segmentations") for p in raw_paths]
64    label_paths = [p.replace(".nii.gz", "_seg.nii.gz") for p in label_paths]
65
66    assert len(raw_paths) == len(label_paths)
67
68    return raw_paths, label_paths

Get paths to the CadAIver data.

Arguments:
  • path: Filepath to a folder where the downloaded data is stored.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_ct_cadaiver_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 71def get_ct_cadaiver_dataset(
 72    path: Union[os.PathLike, str],
 73    patch_shape: Tuple[int, ...],
 74    resize_inputs: bool = False,
 75    download: bool = False,
 76    **kwargs
 77) -> Dataset:
 78    """Get the CadAIver dataset for vertebrae segmentation.
 79
 80    Args:
 81        path: Filepath to a folder where the data is downloaded for further processing.
 82        patch_shape: The patch shape to use for training.
 83        resize_inputs: Whether to resize inputs to the desired patch shape.
 84        download: Whether to download the data if it is not present.
 85        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 86
 87    Returns:
 88        The segmentation dataset.
 89    """
 90    raw_paths, label_paths = get_ct_cadaiver_paths(path, download)
 91
 92    if resize_inputs:
 93        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
 94        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
 95            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
 96        )
 97
 98    return torch_em.default_segmentation_dataset(
 99        raw_paths=raw_paths,
100        raw_key="data",
101        label_paths=label_paths,
102        label_key="data",
103        is_seg_dataset=True,
104        patch_shape=patch_shape,
105        **kwargs
106    )

Get the CadAIver dataset for vertebrae segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • resize_inputs: Whether to resize inputs to the desired patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_ct_cadaiver_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
109def get_ct_cadaiver_loader(
110    path: Union[os.PathLike, str],
111    batch_size: int,
112    patch_shape: Tuple[int, ...],
113    resize_inputs: bool = False,
114    download: bool = False,
115    **kwargs
116) -> DataLoader:
117    """Get the CadAIver dataset for vertebrae segmentation.
118
119    Args:
120        path: Filepath to a folder where the data is downloaded for further processing.
121        batch_size: The batch size for training.
122        patch_shape: The patch shape to use for training.
123        resize_inputs: Whether to resize inputs to the desired patch shape.
124        download: Whether to download the data if it is not present.
125        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
126
127    Returns:
128        The DataLoader.
129    """
130    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
131    dataset = get_ct_cadaiver_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
132    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the CadAIver dataset for vertebrae segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • resize_inputs: Whether to resize inputs to the desired patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.