torch_em.data.datasets.medical.spider

The SPIDER dataset contains annotations for segmentation of vertebrae, intervertebral discs and spinal canal in T1 and T2 MRI series.

This dataset is from the following publication:

Please cite it if you use this data in a publication.

View Source

  1"""The SPIDER dataset contains annotations for segmentation of vertebrae,
  2intervertebral discs and spinal canal in T1 and T2 MRI series.
  3
  4This dataset is from the following publication:
  5- https://zenodo.org/records/10159290
  6- https://www.nature.com/articles/s41597-024-03090-w
  7
  8Please cite it if you use this data in a publication.
  9"""
 10
 11import os
 12from glob import glob
 13from natsort import natsorted
 14from typing import Tuple, List, Union
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18import torch_em
 19
 20from .. import util
 21
 22
 23URL = {
 24    "images": "https://zenodo.org/records/10159290/files/images.zip?download=1",
 25    "masks": "https://zenodo.org/records/10159290/files/masks.zip?download=1"
 26}
 27
 28CHECKSUMS = {
 29    "images": "a54cba2905284ff6cc9999f1dd0e4d871c8487187db7cd4b068484eac2f50f17",
 30    "masks": "13a6e25a8c0d74f507e16ebb2edafc277ceeaf2598474f1fed24fdf59cb7f18f"
 31}
 32
 33
 34def get_spider_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 35    """Download the SPIDER dataset.
 36
 37    Args:
 38        path: Filepath to a folder where the data is downloaded for further processing.
 39        download: Whether to download the data if it is not present.
 40
 41    Returns:
 42        Filepath where the data is downloaded.
 43    """
 44    data_dir = os.path.join(path, "data")
 45    if os.path.exists(data_dir):
 46        return data_dir
 47
 48    os.makedirs(path, exist_ok=True)
 49
 50    zip_path = os.path.join(path, "images.zip")
 51    util.download_source(path=zip_path, url=URL["images"], download=download, checksum=CHECKSUMS["images"])
 52    util.unzip(zip_path=zip_path, dst=data_dir)
 53
 54    zip_path = os.path.join(path, "masks.zip")
 55    util.download_source(path=zip_path, url=URL["masks"], download=download, checksum=CHECKSUMS["masks"])
 56    util.unzip(zip_path=zip_path, dst=data_dir)
 57
 58    return data_dir
 59
 60
 61def get_spider_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 62    """Get paths to the SPIDER data.
 63
 64    Args:
 65        path: Filepath to a folder where the data is downloaded for further processing.
 66        download: Whether to download the data if it is not present.
 67
 68    Returns:
 69        List of filepaths for the image data.
 70        List of filepaths for the label data.
 71    """
 72    data_dir = get_spider_data(path, download)
 73
 74    image_paths = natsorted(glob(os.path.join(data_dir, "images", "*.mha")))
 75    gt_paths = natsorted(glob(os.path.join(data_dir, "masks", "*.mha")))
 76
 77    return image_paths, gt_paths
 78
 79
 80def get_spider_dataset(
 81    path: Union[os.PathLike, str],
 82    patch_shape: Tuple[int, ...],
 83    resize_inputs: bool = False,
 84    download: bool = False,
 85    **kwargs
 86) -> Dataset:
 87    """Get the SPIDER dataset.
 88
 89    Args:
 90        path: Filepath to a folder where the data is downloaded for further processing.
 91        patch_shape: The patch shape to use for training.
 92        resize_inputs: Whether to resize inputs to the desired patch shape.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97        The segmentation dataset.
 98    """
 99    # TODO: expose the choice to choose specific MRI modality, for now this works for our interests.
100    image_paths, gt_paths = get_spider_paths(path, download)
101
102    if resize_inputs:
103        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
104        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
105            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
106        )
107
108    return torch_em.default_segmentation_dataset(
109        raw_paths=image_paths,
110        raw_key=None,
111        label_paths=gt_paths,
112        label_key=None,
113        is_seg_dataset=True,
114        patch_shape=patch_shape,
115        **kwargs
116    )
117
118
119def get_spider_loader(
120    path: Union[os.PathLike, str],
121    batch_size: int,
122    patch_shape: Tuple[int, ...],
123    resize_inputs: bool = False,
124    download: bool = False,
125    **kwargs
126) -> DataLoader:
127    """Get the SPIDER dataloader.
128
129    Args:
130        path: Filepath to a folder where the data is downloaded for further processing.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        resize_inputs: Whether to resize inputs to the desired patch shape.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
136
137    Returns:
138        The DataLoader.
139    """
140    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
141    dataset = get_spider_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
142    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = {'images': 'https://zenodo.org/records/10159290/files/images.zip?download=1', 'masks': 'https://zenodo.org/records/10159290/files/masks.zip?download=1'}

CHECKSUMS = {'images': 'a54cba2905284ff6cc9999f1dd0e4d871c8487187db7cd4b068484eac2f50f17', 'masks': '13a6e25a8c0d74f507e16ebb2edafc277ceeaf2598474f1fed24fdf59cb7f18f'}

def get_spider_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

35def get_spider_data(path: Union[os.PathLike, str], download: bool = False) -> str:
36    """Download the SPIDER dataset.
37
38    Args:
39        path: Filepath to a folder where the data is downloaded for further processing.
40        download: Whether to download the data if it is not present.
41
42    Returns:
43        Filepath where the data is downloaded.
44    """
45    data_dir = os.path.join(path, "data")
46    if os.path.exists(data_dir):
47        return data_dir
48
49    os.makedirs(path, exist_ok=True)
50
51    zip_path = os.path.join(path, "images.zip")
52    util.download_source(path=zip_path, url=URL["images"], download=download, checksum=CHECKSUMS["images"])
53    util.unzip(zip_path=zip_path, dst=data_dir)
54
55    zip_path = os.path.join(path, "masks.zip")
56    util.download_source(path=zip_path, url=URL["masks"], download=download, checksum=CHECKSUMS["masks"])
57    util.unzip(zip_path=zip_path, dst=data_dir)
58
59    return data_dir

Download the SPIDER dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_spider_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: View Source

62def get_spider_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
63    """Get paths to the SPIDER data.
64
65    Args:
66        path: Filepath to a folder where the data is downloaded for further processing.
67        download: Whether to download the data if it is not present.
68
69    Returns:
70        List of filepaths for the image data.
71        List of filepaths for the label data.
72    """
73    data_dir = get_spider_data(path, download)
74
75    image_paths = natsorted(glob(os.path.join(data_dir, "images", "*.mha")))
76    gt_paths = natsorted(glob(os.path.join(data_dir, "masks", "*.mha")))
77
78    return image_paths, gt_paths

Get paths to the SPIDER data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_spider_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 81def get_spider_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, ...],
 84    resize_inputs: bool = False,
 85    download: bool = False,
 86    **kwargs
 87) -> Dataset:
 88    """Get the SPIDER dataset.
 89
 90    Args:
 91        path: Filepath to a folder where the data is downloaded for further processing.
 92        patch_shape: The patch shape to use for training.
 93        resize_inputs: Whether to resize inputs to the desired patch shape.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    # TODO: expose the choice to choose specific MRI modality, for now this works for our interests.
101    image_paths, gt_paths = get_spider_paths(path, download)
102
103    if resize_inputs:
104        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
105        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
106            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
107        )
108
109    return torch_em.default_segmentation_dataset(
110        raw_paths=image_paths,
111        raw_key=None,
112        label_paths=gt_paths,
113        label_key=None,
114        is_seg_dataset=True,
115        patch_shape=patch_shape,
116        **kwargs
117    )

Get the SPIDER dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_spider_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

120def get_spider_loader(
121    path: Union[os.PathLike, str],
122    batch_size: int,
123    patch_shape: Tuple[int, ...],
124    resize_inputs: bool = False,
125    download: bool = False,
126    **kwargs
127) -> DataLoader:
128    """Get the SPIDER dataloader.
129
130    Args:
131        path: Filepath to a folder where the data is downloaded for further processing.
132        batch_size: The batch size for training.
133        patch_shape: The patch shape to use for training.
134        resize_inputs: Whether to resize inputs to the desired patch shape.
135        download: Whether to download the data if it is not present.
136        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
137
138    Returns:
139        The DataLoader.
140    """
141    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
142    dataset = get_spider_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
143    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the SPIDER dataloader.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.