torch_em.data.datasets.medical.motum
The MOTUM dataset contains annotations for tumor (brain metastases and high grade glioma) segmentation in brain multi-modal MRI scans.
The dataset is located at https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/. This dataset is from the publication https://doi.org/10.1038/s41597-024-03634-0. Please cite it if you use this dataset for your research.
1"""The MOTUM dataset contains annotations for tumor (brain metastases and high grade glioma) segmentation 2in brain multi-modal MRI scans. 3 4The dataset is located at https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/. 5This dataset is from the publication https://doi.org/10.1038/s41597-024-03634-0. 6Please cite it if you use this dataset for your research. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Tuple, Literal, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/10.12751_g-node.tvzqc5.zip" 22CHECKSUM = "2626862599a3fcfe4ac0cefcea3af5b190625275036cc8eb4c9039cbd54e2d7c" 23 24 25def get_motum_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Download the MOTUM dataset. 27 28 Args: 29 path: Filepath to a folder where the data is downloaded for further processing. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 Filepath where the data is downloaded. 34 """ 35 data_dir = os.path.join(path, "") 36 if os.path.exists(data_dir): 37 return data_dir 38 39 os.makedirs(path, exist_ok=True) 40 41 zip_path = os.path.join(path, "data.zip") 42 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip(zip_path=zip_path, dst=path) 44 45 return data_dir 46 47 48def get_motum_paths( 49 path: Union[os.PathLike, str], 50 split: Literal['train', 'val', 'test'], 51 modality: Literal['flair', 't1ce'], 52 download: bool = False 53) -> Tuple[List[int], List[int]]: 54 """Get paths to the MOTUM data. 55 56 Args: 57 path: Filepath to a folder where the data is downloaded for further processing. 58 split: The choice of data split. 59 modality: The choice of imaging modality. 60 download: Whether to download the data if it is not present. 61 62 Returns: 63 List of filepath for the image data. 64 List of filepaths for the label data. 65 """ 66 data_dir = get_motum_data(path, download) 67 68 if modality not in ["flair", "t1ce"]: 69 raise ValueError(f"'{modality}' is not a valid modality.") 70 71 raw_paths = natsorted(glob(os.path.join(data_dir, "sub-*", "anat", f"sub-*_{modality}.nii.gz"))) 72 label_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "sub-*", f"{modality}_seg_*.nii.gz"))) 73 74 # NOTE: Remove labels which are missing preprocessed volumes 75 missing_inputs = ["sub-0030", "sub-0031", "sub-0032"] 76 label_paths = [p for p in label_paths if all([p.find(_f) == -1 for _f in missing_inputs])] 77 78 if split == "train": 79 raw_paths, label_paths = raw_paths[:35], label_paths[:35] 80 elif split == "val": 81 raw_paths, label_paths = raw_paths[35:45], label_paths[35:45] 82 elif split == "test": 83 raw_paths, label_paths = raw_paths[45:], label_paths[45:] 84 else: 85 raise ValueError(f"'{split}' is not a valid split.") 86 87 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 88 89 return raw_paths, label_paths 90 91 92def get_motum_dataset( 93 path: Union[os.PathLike, str], 94 patch_shape: Tuple[int, ...], 95 split: Literal['train', 'val', 'test'], 96 modality: Literal['flair', 't1ce'], 97 resize_inputs: bool = False, 98 download: bool = False, 99 **kwargs 100) -> Dataset: 101 """Get the MOTUM dataset for tumor segmentation. 102 103 Args: 104 path: Filepath to a folder where the data is downloaded for further processing. 105 patch_shape: The patch shape to use for training. 106 split: The choice of data split. 107 modality: The choice of imaging modality. 108 resize_inputs: Whether to resize inputs to the desired patch shape. 109 download: Whether to download the data if it is not present. 110 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 111 112 Returns: 113 The segmentation dataset. 114 """ 115 raw_paths, label_paths = get_motum_paths(path, split, modality, download) 116 117 if resize_inputs: 118 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 119 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 120 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 121 ) 122 123 return torch_em.default_segmentation_dataset( 124 raw_paths=raw_paths, 125 raw_key="data", 126 label_paths=label_paths, 127 label_key="data", 128 is_seg_dataset=True, 129 patch_shape=patch_shape, 130 **kwargs 131 ) 132 133 134def get_motum_loader( 135 path: Union[os.PathLike, str], 136 batch_size: int, 137 patch_shape: Tuple[int, ...], 138 split: Literal['train', 'val', 'test'], 139 modality: Literal['flair', 't1ce'], 140 resize_inputs: bool = False, 141 download: bool = False, 142 **kwargs 143) -> DataLoader: 144 """Get the MOTUM dataloader for tumor segmentation. 145 146 Args: 147 path: Filepath to a folder where the data is downloaded for further processing.' 148 batch_size: The batch size for training. 149 patch_shape: The patch shape to use for training. 150 split: The choice of data split. 151 modality: The choice of imaging modality. 152 resize_inputs: Whether to resize inputs to the desired patch shape. 153 download: Whether to download the data if it is not present. 154 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 155 156 Returns: 157 The DataLoader. 158 """ 159 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 160 dataset = get_motum_dataset(path, patch_shape, split, modality, resize_inputs, download, **ds_kwargs) 161 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/10.12751_g-node.tvzqc5.zip'
CHECKSUM =
'2626862599a3fcfe4ac0cefcea3af5b190625275036cc8eb4c9039cbd54e2d7c'
def
get_motum_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_motum_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Download the MOTUM dataset. 28 29 Args: 30 path: Filepath to a folder where the data is downloaded for further processing. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 Filepath where the data is downloaded. 35 """ 36 data_dir = os.path.join(path, "") 37 if os.path.exists(data_dir): 38 return data_dir 39 40 os.makedirs(path, exist_ok=True) 41 42 zip_path = os.path.join(path, "data.zip") 43 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 44 util.unzip(zip_path=zip_path, dst=path) 45 46 return data_dir
Download the MOTUM dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_motum_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], modality: Literal['flair', 't1ce'], download: bool = False) -> Tuple[List[int], List[int]]:
49def get_motum_paths( 50 path: Union[os.PathLike, str], 51 split: Literal['train', 'val', 'test'], 52 modality: Literal['flair', 't1ce'], 53 download: bool = False 54) -> Tuple[List[int], List[int]]: 55 """Get paths to the MOTUM data. 56 57 Args: 58 path: Filepath to a folder where the data is downloaded for further processing. 59 split: The choice of data split. 60 modality: The choice of imaging modality. 61 download: Whether to download the data if it is not present. 62 63 Returns: 64 List of filepath for the image data. 65 List of filepaths for the label data. 66 """ 67 data_dir = get_motum_data(path, download) 68 69 if modality not in ["flair", "t1ce"]: 70 raise ValueError(f"'{modality}' is not a valid modality.") 71 72 raw_paths = natsorted(glob(os.path.join(data_dir, "sub-*", "anat", f"sub-*_{modality}.nii.gz"))) 73 label_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "sub-*", f"{modality}_seg_*.nii.gz"))) 74 75 # NOTE: Remove labels which are missing preprocessed volumes 76 missing_inputs = ["sub-0030", "sub-0031", "sub-0032"] 77 label_paths = [p for p in label_paths if all([p.find(_f) == -1 for _f in missing_inputs])] 78 79 if split == "train": 80 raw_paths, label_paths = raw_paths[:35], label_paths[:35] 81 elif split == "val": 82 raw_paths, label_paths = raw_paths[35:45], label_paths[35:45] 83 elif split == "test": 84 raw_paths, label_paths = raw_paths[45:], label_paths[45:] 85 else: 86 raise ValueError(f"'{split}' is not a valid split.") 87 88 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 89 90 return raw_paths, label_paths
Get paths to the MOTUM data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- modality: The choice of imaging modality.
- download: Whether to download the data if it is not present.
Returns:
List of filepath for the image data. List of filepaths for the label data.
def
get_motum_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], modality: Literal['flair', 't1ce'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
93def get_motum_dataset( 94 path: Union[os.PathLike, str], 95 patch_shape: Tuple[int, ...], 96 split: Literal['train', 'val', 'test'], 97 modality: Literal['flair', 't1ce'], 98 resize_inputs: bool = False, 99 download: bool = False, 100 **kwargs 101) -> Dataset: 102 """Get the MOTUM dataset for tumor segmentation. 103 104 Args: 105 path: Filepath to a folder where the data is downloaded for further processing. 106 patch_shape: The patch shape to use for training. 107 split: The choice of data split. 108 modality: The choice of imaging modality. 109 resize_inputs: Whether to resize inputs to the desired patch shape. 110 download: Whether to download the data if it is not present. 111 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 112 113 Returns: 114 The segmentation dataset. 115 """ 116 raw_paths, label_paths = get_motum_paths(path, split, modality, download) 117 118 if resize_inputs: 119 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 120 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 121 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 122 ) 123 124 return torch_em.default_segmentation_dataset( 125 raw_paths=raw_paths, 126 raw_key="data", 127 label_paths=label_paths, 128 label_key="data", 129 is_seg_dataset=True, 130 patch_shape=patch_shape, 131 **kwargs 132 )
Get the MOTUM dataset for tumor segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- modality: The choice of imaging modality.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_motum_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], modality: Literal['flair', 't1ce'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
135def get_motum_loader( 136 path: Union[os.PathLike, str], 137 batch_size: int, 138 patch_shape: Tuple[int, ...], 139 split: Literal['train', 'val', 'test'], 140 modality: Literal['flair', 't1ce'], 141 resize_inputs: bool = False, 142 download: bool = False, 143 **kwargs 144) -> DataLoader: 145 """Get the MOTUM dataloader for tumor segmentation. 146 147 Args: 148 path: Filepath to a folder where the data is downloaded for further processing.' 149 batch_size: The batch size for training. 150 patch_shape: The patch shape to use for training. 151 split: The choice of data split. 152 modality: The choice of imaging modality. 153 resize_inputs: Whether to resize inputs to the desired patch shape. 154 download: Whether to download the data if it is not present. 155 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 156 157 Returns: 158 The DataLoader. 159 """ 160 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 161 dataset = get_motum_dataset(path, patch_shape, split, modality, resize_inputs, download, **ds_kwargs) 162 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the MOTUM dataloader for tumor segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.'
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- modality: The choice of imaging modality.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.