torch_em.data.datasets.medical.oasis

The OASIS dataset contains two set of annotations: one for 4 tissue segmentation and 35 anatomical segmentation in brain T1 MRI.

The dataset comes from https://github.com/adalca/medical-datasets/blob/master/neurite-oasis.md.

This dataset is from the following publications:

Please cite them if you use this dataset for your research.

  1"""The OASIS dataset contains two set of annotations:
  2one for 4 tissue segmentation and 35 anatomical segmentation in brain T1 MRI.
  3
  4The dataset comes from https://github.com/adalca/medical-datasets/blob/master/neurite-oasis.md.
  5
  6This dataset is from the following publications:
  7- https://doi.org/10.59275/j.melba.2022-74f1
  8- https://doi.org/10.1162/jocn.2007.19.9.1498
  9
 10Please cite them if you use this dataset for your research.
 11"""
 12
 13import os
 14from glob import glob
 15from typing import Union, Tuple, Literal, List
 16
 17from torch.utils.data import Dataset, DataLoader
 18
 19import torch_em
 20
 21from .. import util
 22
 23
 24URL = "https://surfer.nmr.mgh.harvard.edu/ftp/data/neurite/data/neurite-oasis.v1.0.tar"
 25CHECKSUM = "86dd117dda17f736ade8a4088d7e98e066e1181950fe8b406f1a35f7fb743e78"
 26
 27
 28def get_oasis_data(path: Union[os.PathLike, str], download: bool = False):
 29    """Download the OASIS dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the data is downloaded for further processing.
 33        download: Whether to download the data if it is not present.
 34    """
 35    data_path = os.path.join(path, "data")
 36    if os.path.exists(data_path):
 37        return
 38
 39    os.makedirs(path, exist_ok=True)
 40    tar_path = os.path.join(path, "neurite-oasis.v1.0.tar")
 41    util.download_source(path=tar_path, url=URL, download=download, checksum=CHECKSUM)
 42    util.unzip_tarfile(tar_path=tar_path, dst=data_path, remove=False)
 43
 44
 45def get_oasis_paths(
 46    path: Union[os.PathLike, str],
 47    split: Literal['train', 'val', 'test'],
 48    source: Literal['orig', 'norm'] = "orig",
 49    label_annotations: Literal['4', '35'] = "4",
 50    download: bool = False
 51) -> Tuple[List[str], List[str]]:
 52    """Get paths to the OASIS data.
 53
 54    Args:
 55        path: Filepath to a folder where the data is downloaded for further processing.
 56        split: The choice of data split.
 57        source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
 58        label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
 59        download: Whether to download the data if it is not present.
 60
 61    Returns:
 62        List of filepaths for the image data.
 63        List of filepaths for the label data.
 64    """
 65    get_oasis_data(path, download)
 66
 67    patient_dirs = glob(os.path.join(path, "data", "OASIS_*"))
 68    raw_paths, label_paths = [], []
 69    for pdir in patient_dirs:
 70        raw_paths.append(os.path.join(pdir, f"{source}.nii.gz"))
 71        label_paths.append(os.path.join(pdir, f"seg{label_annotations}.nii.gz"))
 72
 73    if split == "train":
 74        raw_paths, label_paths = raw_paths[:350], label_paths[:350]
 75    elif split == "val":
 76        raw_paths, label_paths = raw_paths[350:375], label_paths[350:375]
 77    elif split == "test":
 78        raw_paths, label_paths = raw_paths[375:], label_paths[375:]
 79    else:
 80        raise ValueError(f"'{split}' is not a valid split.")
 81
 82    assert len(raw_paths) == len(label_paths)
 83
 84    return raw_paths, label_paths
 85
 86
 87def get_oasis_dataset(
 88    path: Union[os.PathLike, str],
 89    patch_shape: Tuple[int, ...],
 90    split: Literal['train', 'val', 'test'],
 91    source: Literal['orig', 'norm'] = "orig",
 92    label_annotations: Literal['4', '35'] = "4",
 93    resize_inputs: bool = False,
 94    download: bool = False,
 95    **kwargs
 96) -> Dataset:
 97    """Get the OASIS dataset for tissue / anatomical segmentation.
 98
 99    Args:
100        path: Filepath to a folder where the data is downloaded for further processing.
101        patch_shape: The patch shape to use for training.
102        split: The choice of data split.
103        source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
104        label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
105        resize_inputs: Whether to resize inputs to the desired patch shape.
106        download: Whether to download the data if it is not present.
107        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
108
109    Returns:
110        The segmentation dataset.
111    """
112    raw_paths, label_paths = get_oasis_paths(path, split, source, label_annotations, download)
113
114    if resize_inputs:
115        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
116        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
117            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
118        )
119
120    return torch_em.default_segmentation_dataset(
121        raw_paths=raw_paths,
122        raw_key="data",
123        label_paths=label_paths,
124        label_key="data",
125        patch_shape=patch_shape,
126        is_seg_dataset=True,
127        **kwargs
128    )
129
130
131def get_oasis_loader(
132    path: Union[os.PathLike, str],
133    batch_size: int,
134    patch_shape: Tuple[int, ...],
135    split: Literal['train', 'val', 'test'],
136    source: Literal['orig', 'norm'] = "orig",
137    label_annotations: Literal['4', '35'] = "4",
138    resize_inputs: bool = False,
139    download: bool = False,
140    **kwargs
141) -> DataLoader:
142    """Get the OASIS dataloader for tissue / anatomical segmentation.
143
144    Args:
145        path: Filepath to a folder where the data is downloaded for further processing.
146        batch_size: The batch size for training.
147        patch_shape: The patch shape to use for training.
148        split: The choice of data split.
149        source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
150        label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
151        resize_inputs: Whether to resize inputs to the desired patch shape.
152        download: Whether to download the data if it is not present.
153        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
154
155    Returns:
156        The DataLoader.
157    """
158    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
159    dataset = get_oasis_dataset(
160        path, patch_shape, split, source, label_annotations, resize_inputs, download, **ds_kwargs
161    )
162    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://surfer.nmr.mgh.harvard.edu/ftp/data/neurite/data/neurite-oasis.v1.0.tar'
CHECKSUM = '86dd117dda17f736ade8a4088d7e98e066e1181950fe8b406f1a35f7fb743e78'
def get_oasis_data(path: Union[os.PathLike, str], download: bool = False):
29def get_oasis_data(path: Union[os.PathLike, str], download: bool = False):
30    """Download the OASIS dataset.
31
32    Args:
33        path: Filepath to a folder where the data is downloaded for further processing.
34        download: Whether to download the data if it is not present.
35    """
36    data_path = os.path.join(path, "data")
37    if os.path.exists(data_path):
38        return
39
40    os.makedirs(path, exist_ok=True)
41    tar_path = os.path.join(path, "neurite-oasis.v1.0.tar")
42    util.download_source(path=tar_path, url=URL, download=download, checksum=CHECKSUM)
43    util.unzip_tarfile(tar_path=tar_path, dst=data_path, remove=False)

Download the OASIS dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
def get_oasis_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], source: Literal['orig', 'norm'] = 'orig', label_annotations: Literal['4', '35'] = '4', download: bool = False) -> Tuple[List[str], List[str]]:
46def get_oasis_paths(
47    path: Union[os.PathLike, str],
48    split: Literal['train', 'val', 'test'],
49    source: Literal['orig', 'norm'] = "orig",
50    label_annotations: Literal['4', '35'] = "4",
51    download: bool = False
52) -> Tuple[List[str], List[str]]:
53    """Get paths to the OASIS data.
54
55    Args:
56        path: Filepath to a folder where the data is downloaded for further processing.
57        split: The choice of data split.
58        source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
59        label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
60        download: Whether to download the data if it is not present.
61
62    Returns:
63        List of filepaths for the image data.
64        List of filepaths for the label data.
65    """
66    get_oasis_data(path, download)
67
68    patient_dirs = glob(os.path.join(path, "data", "OASIS_*"))
69    raw_paths, label_paths = [], []
70    for pdir in patient_dirs:
71        raw_paths.append(os.path.join(pdir, f"{source}.nii.gz"))
72        label_paths.append(os.path.join(pdir, f"seg{label_annotations}.nii.gz"))
73
74    if split == "train":
75        raw_paths, label_paths = raw_paths[:350], label_paths[:350]
76    elif split == "val":
77        raw_paths, label_paths = raw_paths[350:375], label_paths[350:375]
78    elif split == "test":
79        raw_paths, label_paths = raw_paths[375:], label_paths[375:]
80    else:
81        raise ValueError(f"'{split}' is not a valid split.")
82
83    assert len(raw_paths) == len(label_paths)
84
85    return raw_paths, label_paths

Get paths to the OASIS data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The choice of data split.
  • source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
  • label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_oasis_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], source: Literal['orig', 'norm'] = 'orig', label_annotations: Literal['4', '35'] = '4', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 88def get_oasis_dataset(
 89    path: Union[os.PathLike, str],
 90    patch_shape: Tuple[int, ...],
 91    split: Literal['train', 'val', 'test'],
 92    source: Literal['orig', 'norm'] = "orig",
 93    label_annotations: Literal['4', '35'] = "4",
 94    resize_inputs: bool = False,
 95    download: bool = False,
 96    **kwargs
 97) -> Dataset:
 98    """Get the OASIS dataset for tissue / anatomical segmentation.
 99
100    Args:
101        path: Filepath to a folder where the data is downloaded for further processing.
102        patch_shape: The patch shape to use for training.
103        split: The choice of data split.
104        source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
105        label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
106        resize_inputs: Whether to resize inputs to the desired patch shape.
107        download: Whether to download the data if it is not present.
108        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
109
110    Returns:
111        The segmentation dataset.
112    """
113    raw_paths, label_paths = get_oasis_paths(path, split, source, label_annotations, download)
114
115    if resize_inputs:
116        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
117        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
118            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
119        )
120
121    return torch_em.default_segmentation_dataset(
122        raw_paths=raw_paths,
123        raw_key="data",
124        label_paths=label_paths,
125        label_key="data",
126        patch_shape=patch_shape,
127        is_seg_dataset=True,
128        **kwargs
129    )

Get the OASIS dataset for tissue / anatomical segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
  • label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
  • resize_inputs: Whether to resize inputs to the desired patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_oasis_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], source: Literal['orig', 'norm'] = 'orig', label_annotations: Literal['4', '35'] = '4', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
132def get_oasis_loader(
133    path: Union[os.PathLike, str],
134    batch_size: int,
135    patch_shape: Tuple[int, ...],
136    split: Literal['train', 'val', 'test'],
137    source: Literal['orig', 'norm'] = "orig",
138    label_annotations: Literal['4', '35'] = "4",
139    resize_inputs: bool = False,
140    download: bool = False,
141    **kwargs
142) -> DataLoader:
143    """Get the OASIS dataloader for tissue / anatomical segmentation.
144
145    Args:
146        path: Filepath to a folder where the data is downloaded for further processing.
147        batch_size: The batch size for training.
148        patch_shape: The patch shape to use for training.
149        split: The choice of data split.
150        source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
151        label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
152        resize_inputs: Whether to resize inputs to the desired patch shape.
153        download: Whether to download the data if it is not present.
154        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
155
156    Returns:
157        The DataLoader.
158    """
159    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
160    dataset = get_oasis_dataset(
161        path, patch_shape, split, source, label_annotations, resize_inputs, download, **ds_kwargs
162    )
163    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the OASIS dataloader for tissue / anatomical segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
  • label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
  • resize_inputs: Whether to resize inputs to the desired patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.