torch_em.data.datasets.medical.autopet

The AutoPET dataset contains annotations for lesion segmentation in whole-body FDG-PET/CT scans.

This dataset is from the AutoPET II - Automated Lesion Segmentation in PET/CT - Domain Generalization challenge. Link: https://autopet-ii.grand-challenge.org/

Please cite it if you use this dataset for publication.

  1"""The AutoPET dataset contains annotations for lesion segmentation in whole-body FDG-PET/CT scans.
  2
  3This dataset is from the `AutoPET II - Automated Lesion Segmentation in PET/CT - Domain Generalization` challenge.
  4Link: https://autopet-ii.grand-challenge.org/
  5
  6Please cite it if you use this dataset for publication.
  7"""
  8
  9import os
 10from glob import glob
 11from typing import Tuple, Optional, Union, Literal, List
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20AUTOPET_DATA = "http://193.196.20.155/data/autoPET/data/nifti.zip"
 21CHECKSUM = "0ac2186ea6d936ff41ce605c6a9588aeb20f031085589897dbab22fc82a12972"
 22
 23
 24def get_autopet_data(path: Union[os.PathLike, str], download: bool = False):
 25    """Download the AutoPET dataset.
 26
 27    Args:
 28        path: Filepath to a folder where the data is downloaded for further processing.
 29        download: Whether to download the data if it is not present.
 30
 31    Returns:
 32        Filepath where the data is downloaded.
 33    """
 34    target_dir = os.path.join(path, "AutoPET-II")
 35    if os.path.exists(target_dir):
 36        return
 37
 38    os.makedirs(target_dir)
 39
 40    zip_path = os.path.join(path, "autopet.zip")
 41    print("The AutoPET data is not available yet and will be downloaded.")
 42    print("Note that this dataset is large, so this step can take several hours (depending on your internet).")
 43    util.download_source(path=zip_path, url=AUTOPET_DATA, download=download, checksum=CHECKSUM)
 44    util.unzip(zip_path, target_dir, remove=False)
 45
 46
 47def get_autopet_paths(
 48    path: Union[os.PathLike, str], modality: Optional[Literal["CT", "PET"]] = None, download: bool = False,
 49) -> Tuple[List[str], List[str]]:
 50    """Get paths to the AutoPET adta.
 51
 52    Args:
 53        path: Filepath to a folder where the data is downloaded for further processing.
 54        modality: The choice of imaging modality.
 55        download: Whether to download the data if it is not present.
 56
 57    Returns:
 58        List of filepaths for the image data.
 59        List of filepaths for the label data.
 60    """
 61    get_autopet_data(path, download)
 62
 63    root_dir = os.path.join(path, "AutoPET-II", "FDG-PET-CT-Lesions", "*", "*")
 64    ct_paths = sorted(glob(os.path.join(root_dir, "CTres.nii.gz")))
 65    pet_paths = sorted(glob(os.path.join(root_dir, "SUV.nii.gz")))
 66    label_paths = sorted(glob(os.path.join(root_dir, "SEG.nii.gz")))
 67
 68    if modality is None:
 69        raw_paths = [(ct_path, pet_path) for ct_path, pet_path in zip(ct_paths, pet_paths)]
 70    else:
 71        if modality == "CT":
 72            raw_paths = ct_paths
 73        elif modality == "PET":
 74            raw_paths = pet_paths
 75        else:
 76            raise ValueError("Choose from the available modalities: `CT` / `PET`")
 77
 78    return raw_paths, label_paths
 79
 80
 81def get_autopet_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, ...],
 84    modality: Optional[Literal["CT", "PET"]] = None,
 85    resize_inputs: bool = False,
 86    download: bool = False,
 87    **kwargs
 88) -> Dataset:
 89    """Get the AutoPET dataset for lesion segmentation in whole-bod FDG-PET/CT scans.
 90
 91    Args:
 92        path: Filepath to a folder where the data is downloaded for further processing.
 93        patch_shape: The patch shape to use for training.
 94        modality: The choice of imaging modality.
 95        resize_inputs: Whether to resize the inputs.
 96        download: Whether to download the data if it is not present.
 97        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 98
 99    Returns:
100        The segmentation dataset.
101    """
102    raw_paths, label_paths = get_autopet_paths(path, modality, download)
103
104    if resize_inputs:
105        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
106        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
107            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
108        )
109
110    dataset = torch_em.default_segmentation_dataset(
111        raw_paths=raw_paths,
112        raw_key="data",
113        label_paths=label_paths,
114        label_key="data",
115        patch_shape=patch_shape,
116        with_channels=modality is None,
117        **kwargs
118    )
119
120    if "sampler" in kwargs:
121        for ds in dataset.datasets:
122            ds.max_sampling_attempts = 5000
123
124    return dataset
125
126
127def get_autopet_loader(
128    path: Union[os.PathLike, str],
129    batch_size: int,
130    patch_shape: Tuple[int, ...],
131    modality: Optional[Literal["CT", "PET"]] = None,
132    resize_inputs: bool = False,
133    download: bool = False,
134    **kwargs
135) -> DataLoader:
136    """Get the AutoPET dataloader for lesion segmentation in whole-bod FDG-PET/CT scans.
137
138    Args:
139        path: Filepath to a folder where the data is downloaded for further processing.
140        batch_size: The batch size for training.
141        patch_shape: The patch shape to use for training.
142        modality: The choice of imaging modality.
143        resize_inputs: Whether to resize the inputs.
144        download: Whether to download the data if it is not present.
145        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
146
147    Returns:
148        The DataLoader.
149    """
150    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
151    dataset = get_autopet_dataset(path, patch_shape, modality, resize_inputs, download, **ds_kwargs)
152    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
AUTOPET_DATA = 'http://193.196.20.155/data/autoPET/data/nifti.zip'
CHECKSUM = '0ac2186ea6d936ff41ce605c6a9588aeb20f031085589897dbab22fc82a12972'
def get_autopet_data(path: Union[os.PathLike, str], download: bool = False):
25def get_autopet_data(path: Union[os.PathLike, str], download: bool = False):
26    """Download the AutoPET dataset.
27
28    Args:
29        path: Filepath to a folder where the data is downloaded for further processing.
30        download: Whether to download the data if it is not present.
31
32    Returns:
33        Filepath where the data is downloaded.
34    """
35    target_dir = os.path.join(path, "AutoPET-II")
36    if os.path.exists(target_dir):
37        return
38
39    os.makedirs(target_dir)
40
41    zip_path = os.path.join(path, "autopet.zip")
42    print("The AutoPET data is not available yet and will be downloaded.")
43    print("Note that this dataset is large, so this step can take several hours (depending on your internet).")
44    util.download_source(path=zip_path, url=AUTOPET_DATA, download=download, checksum=CHECKSUM)
45    util.unzip(zip_path, target_dir, remove=False)

Download the AutoPET dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_autopet_paths( path: Union[os.PathLike, str], modality: Optional[Literal['CT', 'PET']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
48def get_autopet_paths(
49    path: Union[os.PathLike, str], modality: Optional[Literal["CT", "PET"]] = None, download: bool = False,
50) -> Tuple[List[str], List[str]]:
51    """Get paths to the AutoPET adta.
52
53    Args:
54        path: Filepath to a folder where the data is downloaded for further processing.
55        modality: The choice of imaging modality.
56        download: Whether to download the data if it is not present.
57
58    Returns:
59        List of filepaths for the image data.
60        List of filepaths for the label data.
61    """
62    get_autopet_data(path, download)
63
64    root_dir = os.path.join(path, "AutoPET-II", "FDG-PET-CT-Lesions", "*", "*")
65    ct_paths = sorted(glob(os.path.join(root_dir, "CTres.nii.gz")))
66    pet_paths = sorted(glob(os.path.join(root_dir, "SUV.nii.gz")))
67    label_paths = sorted(glob(os.path.join(root_dir, "SEG.nii.gz")))
68
69    if modality is None:
70        raw_paths = [(ct_path, pet_path) for ct_path, pet_path in zip(ct_paths, pet_paths)]
71    else:
72        if modality == "CT":
73            raw_paths = ct_paths
74        elif modality == "PET":
75            raw_paths = pet_paths
76        else:
77            raise ValueError("Choose from the available modalities: `CT` / `PET`")
78
79    return raw_paths, label_paths

Get paths to the AutoPET adta.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • modality: The choice of imaging modality.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_autopet_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], modality: Optional[Literal['CT', 'PET']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 82def get_autopet_dataset(
 83    path: Union[os.PathLike, str],
 84    patch_shape: Tuple[int, ...],
 85    modality: Optional[Literal["CT", "PET"]] = None,
 86    resize_inputs: bool = False,
 87    download: bool = False,
 88    **kwargs
 89) -> Dataset:
 90    """Get the AutoPET dataset for lesion segmentation in whole-bod FDG-PET/CT scans.
 91
 92    Args:
 93        path: Filepath to a folder where the data is downloaded for further processing.
 94        patch_shape: The patch shape to use for training.
 95        modality: The choice of imaging modality.
 96        resize_inputs: Whether to resize the inputs.
 97        download: Whether to download the data if it is not present.
 98        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 99
100    Returns:
101        The segmentation dataset.
102    """
103    raw_paths, label_paths = get_autopet_paths(path, modality, download)
104
105    if resize_inputs:
106        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
107        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
108            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
109        )
110
111    dataset = torch_em.default_segmentation_dataset(
112        raw_paths=raw_paths,
113        raw_key="data",
114        label_paths=label_paths,
115        label_key="data",
116        patch_shape=patch_shape,
117        with_channels=modality is None,
118        **kwargs
119    )
120
121    if "sampler" in kwargs:
122        for ds in dataset.datasets:
123            ds.max_sampling_attempts = 5000
124
125    return dataset

Get the AutoPET dataset for lesion segmentation in whole-bod FDG-PET/CT scans.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • modality: The choice of imaging modality.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_autopet_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], modality: Optional[Literal['CT', 'PET']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
128def get_autopet_loader(
129    path: Union[os.PathLike, str],
130    batch_size: int,
131    patch_shape: Tuple[int, ...],
132    modality: Optional[Literal["CT", "PET"]] = None,
133    resize_inputs: bool = False,
134    download: bool = False,
135    **kwargs
136) -> DataLoader:
137    """Get the AutoPET dataloader for lesion segmentation in whole-bod FDG-PET/CT scans.
138
139    Args:
140        path: Filepath to a folder where the data is downloaded for further processing.
141        batch_size: The batch size for training.
142        patch_shape: The patch shape to use for training.
143        modality: The choice of imaging modality.
144        resize_inputs: Whether to resize the inputs.
145        download: Whether to download the data if it is not present.
146        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
147
148    Returns:
149        The DataLoader.
150    """
151    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
152    dataset = get_autopet_dataset(path, patch_shape, modality, resize_inputs, download, **ds_kwargs)
153    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the AutoPET dataloader for lesion segmentation in whole-bod FDG-PET/CT scans.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • modality: The choice of imaging modality.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.