torch_em.data.datasets.medical.autopet
The AutoPET dataset contains annotations for lesion segmentation in whole-body FDG-PET/CT scans.
This dataset is from the AutoPET II - Automated Lesion Segmentation in PET/CT - Domain Generalization
challenge.
Link: https://autopet-ii.grand-challenge.org/
Please cite it if you use this dataset for publication.
1"""The AutoPET dataset contains annotations for lesion segmentation in whole-body FDG-PET/CT scans. 2 3This dataset is from the `AutoPET II - Automated Lesion Segmentation in PET/CT - Domain Generalization` challenge. 4Link: https://autopet-ii.grand-challenge.org/ 5 6Please cite it if you use this dataset for publication. 7""" 8 9import os 10from glob import glob 11from typing import Tuple, Optional, Union, Literal, List 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20AUTOPET_DATA = "http://193.196.20.155/data/autoPET/data/nifti.zip" 21CHECKSUM = "0ac2186ea6d936ff41ce605c6a9588aeb20f031085589897dbab22fc82a12972" 22 23 24def get_autopet_data(path: Union[os.PathLike, str], download: bool = False): 25 """Download the AutoPET dataset. 26 27 Args: 28 path: Filepath to a folder where the data is downloaded for further processing. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 Filepath where the data is downloaded. 33 """ 34 target_dir = os.path.join(path, "AutoPET-II") 35 if os.path.exists(target_dir): 36 return 37 38 os.makedirs(target_dir) 39 40 zip_path = os.path.join(path, "autopet.zip") 41 print("The AutoPET data is not available yet and will be downloaded.") 42 print("Note that this dataset is large, so this step can take several hours (depending on your internet).") 43 util.download_source(path=zip_path, url=AUTOPET_DATA, download=download, checksum=CHECKSUM) 44 util.unzip(zip_path, target_dir, remove=False) 45 46 47def get_autopet_paths( 48 path: Union[os.PathLike, str], modality: Optional[Literal["CT", "PET"]] = None, download: bool = False, 49) -> Tuple[List[str], List[str]]: 50 """Get paths to the AutoPET adta. 51 52 Args: 53 path: Filepath to a folder where the data is downloaded for further processing. 54 modality: The choice of imaging modality. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 List of filepaths for the image data. 59 List of filepaths for the label data. 60 """ 61 get_autopet_data(path, download) 62 63 root_dir = os.path.join(path, "AutoPET-II", "FDG-PET-CT-Lesions", "*", "*") 64 ct_paths = sorted(glob(os.path.join(root_dir, "CTres.nii.gz"))) 65 pet_paths = sorted(glob(os.path.join(root_dir, "SUV.nii.gz"))) 66 label_paths = sorted(glob(os.path.join(root_dir, "SEG.nii.gz"))) 67 68 if modality is None: 69 raw_paths = [(ct_path, pet_path) for ct_path, pet_path in zip(ct_paths, pet_paths)] 70 else: 71 if modality == "CT": 72 raw_paths = ct_paths 73 elif modality == "PET": 74 raw_paths = pet_paths 75 else: 76 raise ValueError("Choose from the available modalities: `CT` / `PET`") 77 78 return raw_paths, label_paths 79 80 81def get_autopet_dataset( 82 path: Union[os.PathLike, str], 83 patch_shape: Tuple[int, ...], 84 modality: Optional[Literal["CT", "PET"]] = None, 85 resize_inputs: bool = False, 86 download: bool = False, 87 **kwargs 88) -> Dataset: 89 """Get the AutoPET dataset for lesion segmentation in whole-bod FDG-PET/CT scans. 90 91 Args: 92 path: Filepath to a folder where the data is downloaded for further processing. 93 patch_shape: The patch shape to use for training. 94 modality: The choice of imaging modality. 95 resize_inputs: Whether to resize the inputs. 96 download: Whether to download the data if it is not present. 97 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 98 99 Returns: 100 The segmentation dataset. 101 """ 102 raw_paths, label_paths = get_autopet_paths(path, modality, download) 103 104 if resize_inputs: 105 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 106 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 107 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 108 ) 109 110 dataset = torch_em.default_segmentation_dataset( 111 raw_paths=raw_paths, 112 raw_key="data", 113 label_paths=label_paths, 114 label_key="data", 115 patch_shape=patch_shape, 116 with_channels=modality is None, 117 **kwargs 118 ) 119 120 if "sampler" in kwargs: 121 for ds in dataset.datasets: 122 ds.max_sampling_attempts = 5000 123 124 return dataset 125 126 127def get_autopet_loader( 128 path: Union[os.PathLike, str], 129 batch_size: int, 130 patch_shape: Tuple[int, ...], 131 modality: Optional[Literal["CT", "PET"]] = None, 132 resize_inputs: bool = False, 133 download: bool = False, 134 **kwargs 135) -> DataLoader: 136 """Get the AutoPET dataloader for lesion segmentation in whole-bod FDG-PET/CT scans. 137 138 Args: 139 path: Filepath to a folder where the data is downloaded for further processing. 140 batch_size: The batch size for training. 141 patch_shape: The patch shape to use for training. 142 modality: The choice of imaging modality. 143 resize_inputs: Whether to resize the inputs. 144 download: Whether to download the data if it is not present. 145 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 146 147 Returns: 148 The DataLoader. 149 """ 150 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 151 dataset = get_autopet_dataset(path, patch_shape, modality, resize_inputs, download, **ds_kwargs) 152 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
AUTOPET_DATA =
'http://193.196.20.155/data/autoPET/data/nifti.zip'
CHECKSUM =
'0ac2186ea6d936ff41ce605c6a9588aeb20f031085589897dbab22fc82a12972'
def
get_autopet_data(path: Union[os.PathLike, str], download: bool = False):
25def get_autopet_data(path: Union[os.PathLike, str], download: bool = False): 26 """Download the AutoPET dataset. 27 28 Args: 29 path: Filepath to a folder where the data is downloaded for further processing. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 Filepath where the data is downloaded. 34 """ 35 target_dir = os.path.join(path, "AutoPET-II") 36 if os.path.exists(target_dir): 37 return 38 39 os.makedirs(target_dir) 40 41 zip_path = os.path.join(path, "autopet.zip") 42 print("The AutoPET data is not available yet and will be downloaded.") 43 print("Note that this dataset is large, so this step can take several hours (depending on your internet).") 44 util.download_source(path=zip_path, url=AUTOPET_DATA, download=download, checksum=CHECKSUM) 45 util.unzip(zip_path, target_dir, remove=False)
Download the AutoPET dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_autopet_paths( path: Union[os.PathLike, str], modality: Optional[Literal['CT', 'PET']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
48def get_autopet_paths( 49 path: Union[os.PathLike, str], modality: Optional[Literal["CT", "PET"]] = None, download: bool = False, 50) -> Tuple[List[str], List[str]]: 51 """Get paths to the AutoPET adta. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 modality: The choice of imaging modality. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 List of filepaths for the image data. 60 List of filepaths for the label data. 61 """ 62 get_autopet_data(path, download) 63 64 root_dir = os.path.join(path, "AutoPET-II", "FDG-PET-CT-Lesions", "*", "*") 65 ct_paths = sorted(glob(os.path.join(root_dir, "CTres.nii.gz"))) 66 pet_paths = sorted(glob(os.path.join(root_dir, "SUV.nii.gz"))) 67 label_paths = sorted(glob(os.path.join(root_dir, "SEG.nii.gz"))) 68 69 if modality is None: 70 raw_paths = [(ct_path, pet_path) for ct_path, pet_path in zip(ct_paths, pet_paths)] 71 else: 72 if modality == "CT": 73 raw_paths = ct_paths 74 elif modality == "PET": 75 raw_paths = pet_paths 76 else: 77 raise ValueError("Choose from the available modalities: `CT` / `PET`") 78 79 return raw_paths, label_paths
Get paths to the AutoPET adta.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- modality: The choice of imaging modality.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_autopet_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], modality: Optional[Literal['CT', 'PET']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
82def get_autopet_dataset( 83 path: Union[os.PathLike, str], 84 patch_shape: Tuple[int, ...], 85 modality: Optional[Literal["CT", "PET"]] = None, 86 resize_inputs: bool = False, 87 download: bool = False, 88 **kwargs 89) -> Dataset: 90 """Get the AutoPET dataset for lesion segmentation in whole-bod FDG-PET/CT scans. 91 92 Args: 93 path: Filepath to a folder where the data is downloaded for further processing. 94 patch_shape: The patch shape to use for training. 95 modality: The choice of imaging modality. 96 resize_inputs: Whether to resize the inputs. 97 download: Whether to download the data if it is not present. 98 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 99 100 Returns: 101 The segmentation dataset. 102 """ 103 raw_paths, label_paths = get_autopet_paths(path, modality, download) 104 105 if resize_inputs: 106 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 107 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 108 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 109 ) 110 111 dataset = torch_em.default_segmentation_dataset( 112 raw_paths=raw_paths, 113 raw_key="data", 114 label_paths=label_paths, 115 label_key="data", 116 patch_shape=patch_shape, 117 with_channels=modality is None, 118 **kwargs 119 ) 120 121 if "sampler" in kwargs: 122 for ds in dataset.datasets: 123 ds.max_sampling_attempts = 5000 124 125 return dataset
Get the AutoPET dataset for lesion segmentation in whole-bod FDG-PET/CT scans.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- modality: The choice of imaging modality.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_autopet_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], modality: Optional[Literal['CT', 'PET']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
128def get_autopet_loader( 129 path: Union[os.PathLike, str], 130 batch_size: int, 131 patch_shape: Tuple[int, ...], 132 modality: Optional[Literal["CT", "PET"]] = None, 133 resize_inputs: bool = False, 134 download: bool = False, 135 **kwargs 136) -> DataLoader: 137 """Get the AutoPET dataloader for lesion segmentation in whole-bod FDG-PET/CT scans. 138 139 Args: 140 path: Filepath to a folder where the data is downloaded for further processing. 141 batch_size: The batch size for training. 142 patch_shape: The patch shape to use for training. 143 modality: The choice of imaging modality. 144 resize_inputs: Whether to resize the inputs. 145 download: Whether to download the data if it is not present. 146 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 147 148 Returns: 149 The DataLoader. 150 """ 151 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 152 dataset = get_autopet_dataset(path, patch_shape, modality, resize_inputs, download, **ds_kwargs) 153 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the AutoPET dataloader for lesion segmentation in whole-bod FDG-PET/CT scans.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- modality: The choice of imaging modality.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.