torch_em.data.datasets.medical.motum

The MOTUM dataset contains annotations for tumor (brain metastases and high grade glioma) segmentation in brain multi-modal MRI scans.

The dataset is located at https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/. This dataset is from the publication https://doi.org/10.1038/s41597-024-03634-0. Please cite it if you use this dataset for your research.

View Source

  1"""The MOTUM dataset contains annotations for tumor (brain metastases and high grade glioma) segmentation
  2in brain multi-modal MRI scans.
  3
  4The dataset is located at https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/.
  5This dataset is from the publication https://doi.org/10.1038/s41597-024-03634-0.
  6Please cite it if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, Literal, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/10.12751_g-node.tvzqc5.zip"
 22CHECKSUM = "2626862599a3fcfe4ac0cefcea3af5b190625275036cc8eb4c9039cbd54e2d7c"
 23
 24
 25def get_motum_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the MOTUM dataset.
 27
 28    Args:
 29        path: Filepath to a folder where the data is downloaded for further processing.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        Filepath where the data is downloaded.
 34    """
 35    data_dir = os.path.join(path, "")
 36    if os.path.exists(data_dir):
 37        return data_dir
 38
 39    os.makedirs(path, exist_ok=True)
 40
 41    zip_path = os.path.join(path, "data.zip")
 42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 43    util.unzip(zip_path=zip_path, dst=path)
 44
 45    return data_dir
 46
 47
 48def get_motum_paths(
 49    path: Union[os.PathLike, str],
 50    split: Literal['train', 'val', 'test'],
 51    modality: Literal['flair', 't1ce'],
 52    download: bool = False
 53) -> Tuple[List[int], List[int]]:
 54    """Get paths to the MOTUM data.
 55
 56    Args:
 57        path: Filepath to a folder where the data is downloaded for further processing.
 58        split: The choice of data split.
 59        modality: The choice of imaging modality.
 60        download: Whether to download the data if it is not present.
 61
 62    Returns:
 63        List of filepath for the image data.
 64        List of filepaths for the label data.
 65    """
 66    data_dir = get_motum_data(path, download)
 67
 68    if modality not in ["flair", "t1ce"]:
 69        raise ValueError(f"'{modality}' is not a valid modality.")
 70
 71    raw_paths = natsorted(glob(os.path.join(data_dir, "sub-*", "anat", f"sub-*_{modality}.nii.gz")))
 72    label_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "sub-*", f"{modality}_seg_*.nii.gz")))
 73
 74    # NOTE: Remove labels which are missing preprocessed volumes
 75    missing_inputs = ["sub-0030", "sub-0031", "sub-0032"]
 76    label_paths = [p for p in label_paths if all([p.find(_f) == -1 for _f in missing_inputs])]
 77
 78    if split == "train":
 79        raw_paths, label_paths = raw_paths[:35], label_paths[:35]
 80    elif split == "val":
 81        raw_paths, label_paths = raw_paths[35:45], label_paths[35:45]
 82    elif split == "test":
 83        raw_paths, label_paths = raw_paths[45:], label_paths[45:]
 84    else:
 85        raise ValueError(f"'{split}' is not a valid split.")
 86
 87    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 88
 89    return raw_paths, label_paths
 90
 91
 92def get_motum_dataset(
 93    path: Union[os.PathLike, str],
 94    patch_shape: Tuple[int, ...],
 95    split: Literal['train', 'val', 'test'],
 96    modality: Literal['flair', 't1ce'],
 97    resize_inputs: bool = False,
 98    download: bool = False,
 99    **kwargs
100) -> Dataset:
101    """Get the MOTUM dataset for tumor segmentation.
102
103    Args:
104        path: Filepath to a folder where the data is downloaded for further processing.
105        patch_shape: The patch shape to use for training.
106        split: The choice of data split.
107        modality: The choice of imaging modality.
108        resize_inputs: Whether to resize inputs to the desired patch shape.
109        download: Whether to download the data if it is not present.
110        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
111
112    Returns:
113        The segmentation dataset.
114    """
115    raw_paths, label_paths = get_motum_paths(path, split, modality, download)
116
117    if resize_inputs:
118        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
119        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
120            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
121        )
122
123    return torch_em.default_segmentation_dataset(
124        raw_paths=raw_paths,
125        raw_key="data",
126        label_paths=label_paths,
127        label_key="data",
128        is_seg_dataset=True,
129        patch_shape=patch_shape,
130        **kwargs
131    )
132
133
134def get_motum_loader(
135    path: Union[os.PathLike, str],
136    batch_size: int,
137    patch_shape: Tuple[int, ...],
138    split: Literal['train', 'val', 'test'],
139    modality: Literal['flair', 't1ce'],
140    resize_inputs: bool = False,
141    download: bool = False,
142    **kwargs
143) -> DataLoader:
144    """Get the MOTUM dataloader for tumor segmentation.
145
146    Args:
147        path: Filepath to a folder where the data is downloaded for further processing.'
148        batch_size: The batch size for training.
149        patch_shape: The patch shape to use for training.
150        split: The choice of data split.
151        modality: The choice of imaging modality.
152        resize_inputs: Whether to resize inputs to the desired patch shape.
153        download: Whether to download the data if it is not present.
154        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
155
156    Returns:
157        The DataLoader.
158    """
159    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
160    dataset = get_motum_dataset(path, patch_shape, split, modality, resize_inputs, download, **ds_kwargs)
161    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://doi.gin.g-node.org/10.12751/g-node.tvzqc5/10.12751_g-node.tvzqc5.zip'

CHECKSUM = '2626862599a3fcfe4ac0cefcea3af5b190625275036cc8eb4c9039cbd54e2d7c'

def get_motum_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

26def get_motum_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the MOTUM dataset.
28
29    Args:
30        path: Filepath to a folder where the data is downloaded for further processing.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        Filepath where the data is downloaded.
35    """
36    data_dir = os.path.join(path, "")
37    if os.path.exists(data_dir):
38        return data_dir
39
40    os.makedirs(path, exist_ok=True)
41
42    zip_path = os.path.join(path, "data.zip")
43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
44    util.unzip(zip_path=zip_path, dst=path)
45
46    return data_dir

Download the MOTUM dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_motum_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], modality: Literal['flair', 't1ce'], download: bool = False) -> Tuple[List[int], List[int]]: View Source

49def get_motum_paths(
50    path: Union[os.PathLike, str],
51    split: Literal['train', 'val', 'test'],
52    modality: Literal['flair', 't1ce'],
53    download: bool = False
54) -> Tuple[List[int], List[int]]:
55    """Get paths to the MOTUM data.
56
57    Args:
58        path: Filepath to a folder where the data is downloaded for further processing.
59        split: The choice of data split.
60        modality: The choice of imaging modality.
61        download: Whether to download the data if it is not present.
62
63    Returns:
64        List of filepath for the image data.
65        List of filepaths for the label data.
66    """
67    data_dir = get_motum_data(path, download)
68
69    if modality not in ["flair", "t1ce"]:
70        raise ValueError(f"'{modality}' is not a valid modality.")
71
72    raw_paths = natsorted(glob(os.path.join(data_dir, "sub-*", "anat", f"sub-*_{modality}.nii.gz")))
73    label_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "sub-*", f"{modality}_seg_*.nii.gz")))
74
75    # NOTE: Remove labels which are missing preprocessed volumes
76    missing_inputs = ["sub-0030", "sub-0031", "sub-0032"]
77    label_paths = [p for p in label_paths if all([p.find(_f) == -1 for _f in missing_inputs])]
78
79    if split == "train":
80        raw_paths, label_paths = raw_paths[:35], label_paths[:35]
81    elif split == "val":
82        raw_paths, label_paths = raw_paths[35:45], label_paths[35:45]
83    elif split == "test":
84        raw_paths, label_paths = raw_paths[45:], label_paths[45:]
85    else:
86        raise ValueError(f"'{split}' is not a valid split.")
87
88    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
89
90    return raw_paths, label_paths

Get paths to the MOTUM data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
modality: The choice of imaging modality.
download: Whether to download the data if it is not present.

Returns:

List of filepath for the image data. List of filepaths for the label data.

def get_motum_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], modality: Literal['flair', 't1ce'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 93def get_motum_dataset(
 94    path: Union[os.PathLike, str],
 95    patch_shape: Tuple[int, ...],
 96    split: Literal['train', 'val', 'test'],
 97    modality: Literal['flair', 't1ce'],
 98    resize_inputs: bool = False,
 99    download: bool = False,
100    **kwargs
101) -> Dataset:
102    """Get the MOTUM dataset for tumor segmentation.
103
104    Args:
105        path: Filepath to a folder where the data is downloaded for further processing.
106        patch_shape: The patch shape to use for training.
107        split: The choice of data split.
108        modality: The choice of imaging modality.
109        resize_inputs: Whether to resize inputs to the desired patch shape.
110        download: Whether to download the data if it is not present.
111        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
112
113    Returns:
114        The segmentation dataset.
115    """
116    raw_paths, label_paths = get_motum_paths(path, split, modality, download)
117
118    if resize_inputs:
119        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
120        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
121            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
122        )
123
124    return torch_em.default_segmentation_dataset(
125        raw_paths=raw_paths,
126        raw_key="data",
127        label_paths=label_paths,
128        label_key="data",
129        is_seg_dataset=True,
130        patch_shape=patch_shape,
131        **kwargs
132    )

Get the MOTUM dataset for tumor segmentation.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
split: The choice of data split.
modality: The choice of imaging modality.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_motum_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], modality: Literal['flair', 't1ce'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

135def get_motum_loader(
136    path: Union[os.PathLike, str],
137    batch_size: int,
138    patch_shape: Tuple[int, ...],
139    split: Literal['train', 'val', 'test'],
140    modality: Literal['flair', 't1ce'],
141    resize_inputs: bool = False,
142    download: bool = False,
143    **kwargs
144) -> DataLoader:
145    """Get the MOTUM dataloader for tumor segmentation.
146
147    Args:
148        path: Filepath to a folder where the data is downloaded for further processing.'
149        batch_size: The batch size for training.
150        patch_shape: The patch shape to use for training.
151        split: The choice of data split.
152        modality: The choice of imaging modality.
153        resize_inputs: Whether to resize inputs to the desired patch shape.
154        download: Whether to download the data if it is not present.
155        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
156
157    Returns:
158        The DataLoader.
159    """
160    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
161    dataset = get_motum_dataset(path, patch_shape, split, modality, resize_inputs, download, **ds_kwargs)
162    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the MOTUM dataloader for tumor segmentation.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.'
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
modality: The choice of imaging modality.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.