torch_em.data.datasets.medical.verse

The VerSe dataset contains annotations for vertebrae segmentation in CT scans.

This dataset is from the publication https://doi.org/10.1016/j.media.2021.102166. Please cite it if you use this dataset for your research.

View Source

  1"""The VerSe dataset contains annotations for vertebrae segmentation in CT scans.
  2
  3This dataset is from the publication https://doi.org/10.1016/j.media.2021.102166.
  4Please cite it if you use this dataset for your research.
  5"""
  6
  7import os
  8from glob import glob
  9from natsort import natsorted
 10from typing import Union, Tuple, Literal
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19URL = {
 20    "train": "https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463786541a01e714d390/?zip=",
 21    "val": "https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463686541a01eb15048c/?zip=",
 22    "test": "https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa4635ba010901f0891bd0/?zip="
 23}
 24
 25# FIXME the checksums are not reliable (same behaviour spotted in PlantSeg downloads from osf)
 26CHECKSUM = {
 27    "train": None,
 28    "val": None,
 29    "test": None,
 30}
 31
 32
 33def get_verse_data(
 34    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
 35) -> str:
 36    """Download the VerSe dataset.
 37
 38    Args:
 39        path: Filepath to a folder where the data is downloaded for further processing.
 40        split: The data split to use. Either 'train', 'val' or 'test'.
 41        download: Whether to download the data if it is not present.
 42
 43    Returns:
 44        Filepath where the data is downloaded.
 45    """
 46    assert split in ["train", "val", "test"], f"'{split}' is not a valid split."
 47
 48    data_dir = os.path.join(path, "data", split)
 49    if os.path.exists(data_dir):
 50        return data_dir
 51
 52    os.makedirs(path, exist_ok=True)
 53
 54    zip_path = os.path.join(path, f"verse2020_{split}.zip")
 55    util.download_source(path=zip_path, url=URL[split], download=download, checksum=CHECKSUM[split])
 56    util.unzip(zip_path=zip_path, dst=data_dir)
 57
 58    return data_dir
 59
 60
 61def get_verse_paths(
 62    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
 63) -> str:
 64    """Get paths to the VerSe data.
 65
 66    Args:
 67        path: Filepath to a folder where the data is downloaded for further processing.
 68        split: The data split to use. Either 'train', 'val' or 'test'.
 69        download: Whether to download the data if it is not present.
 70
 71    Returns:
 72        List of filepaths for the image data.
 73        List of filepaths for the label data.
 74    """
 75    data_dir = get_verse_data(path, split, download)
 76
 77    image_paths = natsorted(glob(os.path.join(data_dir, "rawdata", "*", "*_ct.nii.gz")))
 78    gt_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "*", "*_msk.nii.gz")))
 79
 80    return image_paths, gt_paths
 81
 82
 83def get_verse_dataset(
 84    path: Union[os.PathLike, str],
 85    patch_shape: Tuple[int, ...],
 86    split: Literal['train', 'val', 'test'],
 87    resize_inputs: bool = False,
 88    download: bool = False,
 89    **kwargs
 90) -> Dataset:
 91    """Get the VerSe dataset for vertebrae segmentation.
 92
 93    Args:
 94        path: Filepath to a folder where the data is downloaded for further processing.
 95        patch_shape: The patch shape to use for training.
 96        split: The data split to use. Either 'train', 'val' or 'test'.
 97        resize_inputs: Whether to resize inputs to the desired patch shape.
 98        download: Whether to download the data if it is not present.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
100
101    Returns:
102        The segmentation dataset.
103    """
104    image_paths, gt_paths = get_verse_paths(path, split, download)
105
106    if resize_inputs:
107        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
108        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
109            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
110        )
111
112    return torch_em.default_segmentation_dataset(
113        raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs
114    )
115
116
117def get_verse_loader(
118    path: Union[os.PathLike, str],
119    batch_size: int,
120    patch_shape: Tuple[int, ...],
121    split: Literal['train', 'val', 'test'],
122    resize_inputs: bool = False,
123    download: bool = False,
124    **kwargs
125) -> DataLoader:
126    """Get the VerSe dataloader for vertebrae segmentation.
127
128    Args:
129        path: Filepath to a folder where the data is downloaded for further processing.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        split: The data split to use. Either 'train', 'val' or 'test'.
133        resize_inputs: Whether to resize inputs to the desired patch shape.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
136
137    Returns:
138        The DataLoader.
139    """
140    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
141    dataset = get_verse_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
142    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = {'train': 'https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463786541a01e714d390/?zip=', 'val': 'https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463686541a01eb15048c/?zip=', 'test': 'https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa4635ba010901f0891bd0/?zip='}

CHECKSUM = {'train': None, 'val': None, 'test': None}

def get_verse_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> str: View Source

34def get_verse_data(
35    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
36) -> str:
37    """Download the VerSe dataset.
38
39    Args:
40        path: Filepath to a folder where the data is downloaded for further processing.
41        split: The data split to use. Either 'train', 'val' or 'test'.
42        download: Whether to download the data if it is not present.
43
44    Returns:
45        Filepath where the data is downloaded.
46    """
47    assert split in ["train", "val", "test"], f"'{split}' is not a valid split."
48
49    data_dir = os.path.join(path, "data", split)
50    if os.path.exists(data_dir):
51        return data_dir
52
53    os.makedirs(path, exist_ok=True)
54
55    zip_path = os.path.join(path, f"verse2020_{split}.zip")
56    util.download_source(path=zip_path, url=URL[split], download=download, checksum=CHECKSUM[split])
57    util.unzip(zip_path=zip_path, dst=data_dir)
58
59    return data_dir

Download the VerSe dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The data split to use. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_verse_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> str: View Source

62def get_verse_paths(
63    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
64) -> str:
65    """Get paths to the VerSe data.
66
67    Args:
68        path: Filepath to a folder where the data is downloaded for further processing.
69        split: The data split to use. Either 'train', 'val' or 'test'.
70        download: Whether to download the data if it is not present.
71
72    Returns:
73        List of filepaths for the image data.
74        List of filepaths for the label data.
75    """
76    data_dir = get_verse_data(path, split, download)
77
78    image_paths = natsorted(glob(os.path.join(data_dir, "rawdata", "*", "*_ct.nii.gz")))
79    gt_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "*", "*_msk.nii.gz")))
80
81    return image_paths, gt_paths

Get paths to the VerSe data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The data split to use. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_verse_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 84def get_verse_dataset(
 85    path: Union[os.PathLike, str],
 86    patch_shape: Tuple[int, ...],
 87    split: Literal['train', 'val', 'test'],
 88    resize_inputs: bool = False,
 89    download: bool = False,
 90    **kwargs
 91) -> Dataset:
 92    """Get the VerSe dataset for vertebrae segmentation.
 93
 94    Args:
 95        path: Filepath to a folder where the data is downloaded for further processing.
 96        patch_shape: The patch shape to use for training.
 97        split: The data split to use. Either 'train', 'val' or 'test'.
 98        resize_inputs: Whether to resize inputs to the desired patch shape.
 99        download: Whether to download the data if it is not present.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103        The segmentation dataset.
104    """
105    image_paths, gt_paths = get_verse_paths(path, split, download)
106
107    if resize_inputs:
108        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
109        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
110            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
111        )
112
113    return torch_em.default_segmentation_dataset(
114        raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs
115    )

Get the VerSe dataset for vertebrae segmentation.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
split: The data split to use. Either 'train', 'val' or 'test'.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_verse_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

118def get_verse_loader(
119    path: Union[os.PathLike, str],
120    batch_size: int,
121    patch_shape: Tuple[int, ...],
122    split: Literal['train', 'val', 'test'],
123    resize_inputs: bool = False,
124    download: bool = False,
125    **kwargs
126) -> DataLoader:
127    """Get the VerSe dataloader for vertebrae segmentation.
128
129    Args:
130        path: Filepath to a folder where the data is downloaded for further processing.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        split: The data split to use. Either 'train', 'val' or 'test'.
134        resize_inputs: Whether to resize inputs to the desired patch shape.
135        download: Whether to download the data if it is not present.
136        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
137
138    Returns:
139        The DataLoader.
140    """
141    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
142    dataset = get_verse_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
143    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the VerSe dataloader for vertebrae segmentation.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The data split to use. Either 'train', 'val' or 'test'.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.