torch_em.data.datasets.medical.mice_tumseg

The Mice TumSeg contains annotations for tumor segmentation in micro-CT scans.

This dataset is from the publication https://doi.org/10.1038/s41597-024-03814-y. Please cite it if you use this dataset for your research.

  1"""The Mice TumSeg contains annotations for tumor segmentation in micro-CT scans.
  2
  3This dataset is from the publication https://doi.org/10.1038/s41597-024-03814-y.
  4Please cite it if you use this dataset for your research.
  5"""
  6
  7import os
  8from glob import glob
  9from natsort import natsorted
 10from typing import Union, Tuple, Literal, List
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19URL = "https://erda.ku.dk/archives/ba4fcd9bfa0fb581d593297dd43d1fd1/TumSeg%20database.zip"
 20CHECKSUM = "1c0567358ec81b9e085434a0362c1c2981b791dc2733931aeb022174ecb80399"
 21
 22
 23def get_mice_tumseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 24    """Download the Mice TumSeg dataset.
 25
 26    Args:
 27        path: Filepath to a folder where the data is downloaded for further processing.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        Filepath where the data is downloaded.
 32    """
 33    data_dir = os.path.join(path, r"TumSeg database")
 34    if os.path.exists(data_dir):
 35        return data_dir
 36
 37    os.makedirs(path, exist_ok=True)
 38
 39    zip_path = os.path.join(path, "TumSeg_database.zip")
 40    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 41    util.unzip(zip_path=zip_path, dst=path)
 42    return data_dir
 43
 44
 45def get_mice_tumseg_paths(
 46    path: Union[os.PathLike, str],
 47    split: Literal['train', 'val', 'test'],
 48    rater: Literal["A", "B", "C", "STAPLE"] = "A",
 49    download: bool = False
 50) -> Tuple[List[str], List[str]]:
 51    """Get paths to the Mice TumSeg data.
 52
 53    Args:
 54        path: Filepath to a folder where the data is downloaded for further processing.
 55        split: The choice of data split.
 56        rater: The choice of annotator.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        List of filepaths for the image data.
 61        List of filepaths for the label data.
 62    """
 63    data_dir = get_mice_tumseg_data(path, download)
 64
 65    if rater in ["A", "B", "C"]:
 66        ann_choice = f"Annotator_{rater}"
 67    elif rater == "STAPLE":
 68        ann_choice = rater
 69    else:
 70        raise ValueError(f"'{rater}' is not a valid rater choice.")
 71
 72    raw_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", "CT*.nii.gz"), recursive=True))
 73    label_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", f"{ann_choice}*.nii.gz"), recursive=True))
 74
 75    if split == "train":
 76        raw_paths, label_paths = raw_paths[:325], label_paths[:325]
 77    elif split == "val":
 78        raw_paths, label_paths = raw_paths[325:360], label_paths[325:360]
 79    elif split == "test":
 80        raw_paths, label_paths = raw_paths[360:], label_paths[360:]
 81    else:
 82        raise ValueError(f"'{split}' is not a valid split.")
 83
 84    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 85
 86    return raw_paths, label_paths
 87
 88
 89def get_mice_tumseg_dataset(
 90    path: Union[os.PathLike, str],
 91    patch_shape: Tuple[int, ...],
 92    split: Literal['train', 'val', 'test'],
 93    rater: Literal["A", "B", "C", "STAPLE"] = "A",
 94    resize_inputs: bool = False,
 95    download: bool = False,
 96    **kwargs
 97) -> Dataset:
 98    """Get the Mice TumSeg dataset for tumor segmentation.
 99
100    Args:
101        path: Filepath to a folder where the data is downloaded for further processing.
102        patch_shape: The patch shape to use for training.
103        split: The choice of data split.
104        rater: The choice of annotator.
105        download: Whether to download the data if it is not present.
106        resize_inputs: Whether to resize the inputs.
107        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
108
109    Returns:
110        The segmentation dataset.
111    """
112    raw_paths, label_paths = get_mice_tumseg_paths(path, split, rater, download)
113
114    if resize_inputs:
115        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
116        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
117            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
118        )
119
120    return torch_em.default_segmentation_dataset(
121        raw_paths=raw_paths,
122        raw_key="data",
123        label_paths=label_paths,
124        label_key="data",
125        patch_shape=patch_shape,
126        **kwargs
127    )
128
129
130def get_mice_tumseg_loader(
131    path: Union[os.PathLike, str],
132    batch_size: int,
133    patch_shape: Tuple[int, ...],
134    split: Literal['train', 'val', 'test'],
135    rater: Literal["A", "B", "C", "STAPLE"] = "A",
136    resize_inputs: bool = False,
137    download: bool = False,
138    **kwargs
139) -> DataLoader:
140    """Get the Mice TumSeg dataloader for tumor segmentation.
141
142    Args:
143        path: Filepath to a folder where the data is downloaded for further processing.
144        batch_size: The batch size for training.
145        patch_shape: The patch shape to use for training.
146        split: The choice of data split.
147        rater: The choice of annotator.
148        resize_inputs: Whether to resize the inputs.
149        download: Whether to download the data if it is not present.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
151
152    Returns:
153        The DataLoader.
154    """
155    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
156    dataset = get_mice_tumseg_dataset(path, patch_shape, split, rater, resize_inputs, download, **ds_kwargs)
157    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://erda.ku.dk/archives/ba4fcd9bfa0fb581d593297dd43d1fd1/TumSeg%20database.zip'
CHECKSUM = '1c0567358ec81b9e085434a0362c1c2981b791dc2733931aeb022174ecb80399'
def get_mice_tumseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_mice_tumseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25    """Download the Mice TumSeg dataset.
26
27    Args:
28        path: Filepath to a folder where the data is downloaded for further processing.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        Filepath where the data is downloaded.
33    """
34    data_dir = os.path.join(path, r"TumSeg database")
35    if os.path.exists(data_dir):
36        return data_dir
37
38    os.makedirs(path, exist_ok=True)
39
40    zip_path = os.path.join(path, "TumSeg_database.zip")
41    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
42    util.unzip(zip_path=zip_path, dst=path)
43    return data_dir

Download the Mice TumSeg dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_mice_tumseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], rater: Literal['A', 'B', 'C', 'STAPLE'] = 'A', download: bool = False) -> Tuple[List[str], List[str]]:
46def get_mice_tumseg_paths(
47    path: Union[os.PathLike, str],
48    split: Literal['train', 'val', 'test'],
49    rater: Literal["A", "B", "C", "STAPLE"] = "A",
50    download: bool = False
51) -> Tuple[List[str], List[str]]:
52    """Get paths to the Mice TumSeg data.
53
54    Args:
55        path: Filepath to a folder where the data is downloaded for further processing.
56        split: The choice of data split.
57        rater: The choice of annotator.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        List of filepaths for the image data.
62        List of filepaths for the label data.
63    """
64    data_dir = get_mice_tumseg_data(path, download)
65
66    if rater in ["A", "B", "C"]:
67        ann_choice = f"Annotator_{rater}"
68    elif rater == "STAPLE":
69        ann_choice = rater
70    else:
71        raise ValueError(f"'{rater}' is not a valid rater choice.")
72
73    raw_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", "CT*.nii.gz"), recursive=True))
74    label_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", f"{ann_choice}*.nii.gz"), recursive=True))
75
76    if split == "train":
77        raw_paths, label_paths = raw_paths[:325], label_paths[:325]
78    elif split == "val":
79        raw_paths, label_paths = raw_paths[325:360], label_paths[325:360]
80    elif split == "test":
81        raw_paths, label_paths = raw_paths[360:], label_paths[360:]
82    else:
83        raise ValueError(f"'{split}' is not a valid split.")
84
85    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
86
87    return raw_paths, label_paths

Get paths to the Mice TumSeg data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The choice of data split.
  • rater: The choice of annotator.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_mice_tumseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], rater: Literal['A', 'B', 'C', 'STAPLE'] = 'A', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 90def get_mice_tumseg_dataset(
 91    path: Union[os.PathLike, str],
 92    patch_shape: Tuple[int, ...],
 93    split: Literal['train', 'val', 'test'],
 94    rater: Literal["A", "B", "C", "STAPLE"] = "A",
 95    resize_inputs: bool = False,
 96    download: bool = False,
 97    **kwargs
 98) -> Dataset:
 99    """Get the Mice TumSeg dataset for tumor segmentation.
100
101    Args:
102        path: Filepath to a folder where the data is downloaded for further processing.
103        patch_shape: The patch shape to use for training.
104        split: The choice of data split.
105        rater: The choice of annotator.
106        download: Whether to download the data if it is not present.
107        resize_inputs: Whether to resize the inputs.
108        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
109
110    Returns:
111        The segmentation dataset.
112    """
113    raw_paths, label_paths = get_mice_tumseg_paths(path, split, rater, download)
114
115    if resize_inputs:
116        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
117        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
118            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
119        )
120
121    return torch_em.default_segmentation_dataset(
122        raw_paths=raw_paths,
123        raw_key="data",
124        label_paths=label_paths,
125        label_key="data",
126        patch_shape=patch_shape,
127        **kwargs
128    )

Get the Mice TumSeg dataset for tumor segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • rater: The choice of annotator.
  • download: Whether to download the data if it is not present.
  • resize_inputs: Whether to resize the inputs.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_mice_tumseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], rater: Literal['A', 'B', 'C', 'STAPLE'] = 'A', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
131def get_mice_tumseg_loader(
132    path: Union[os.PathLike, str],
133    batch_size: int,
134    patch_shape: Tuple[int, ...],
135    split: Literal['train', 'val', 'test'],
136    rater: Literal["A", "B", "C", "STAPLE"] = "A",
137    resize_inputs: bool = False,
138    download: bool = False,
139    **kwargs
140) -> DataLoader:
141    """Get the Mice TumSeg dataloader for tumor segmentation.
142
143    Args:
144        path: Filepath to a folder where the data is downloaded for further processing.
145        batch_size: The batch size for training.
146        patch_shape: The patch shape to use for training.
147        split: The choice of data split.
148        rater: The choice of annotator.
149        resize_inputs: Whether to resize the inputs.
150        download: Whether to download the data if it is not present.
151        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
152
153    Returns:
154        The DataLoader.
155    """
156    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
157    dataset = get_mice_tumseg_dataset(path, patch_shape, split, rater, resize_inputs, download, **ds_kwargs)
158    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Mice TumSeg dataloader for tumor segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • rater: The choice of annotator.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.