torch_em.data.datasets.medical.oasis
The OASIS dataset contains two set of annotations: one for 4 tissue segmentation and 35 anatomical segmentation in brain T1 MRI.
The dataset comes from https://github.com/adalca/medical-datasets/blob/master/neurite-oasis.md.
This dataset is from the following publications:
Please cite them if you use this dataset for your research.
1"""The OASIS dataset contains two set of annotations: 2one for 4 tissue segmentation and 35 anatomical segmentation in brain T1 MRI. 3 4The dataset comes from https://github.com/adalca/medical-datasets/blob/master/neurite-oasis.md. 5 6This dataset is from the following publications: 7- https://doi.org/10.59275/j.melba.2022-74f1 8- https://doi.org/10.1162/jocn.2007.19.9.1498 9 10Please cite them if you use this dataset for your research. 11""" 12 13import os 14from glob import glob 15from typing import Union, Tuple, Literal, List 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21from .. import util 22 23 24URL = "https://surfer.nmr.mgh.harvard.edu/ftp/data/neurite/data/neurite-oasis.v1.0.tar" 25CHECKSUM = "86dd117dda17f736ade8a4088d7e98e066e1181950fe8b406f1a35f7fb743e78" 26 27 28def get_oasis_data(path: Union[os.PathLike, str], download: bool = False): 29 """Download the OASIS dataset. 30 31 Args: 32 path: Filepath to a folder where the data is downloaded for further processing. 33 download: Whether to download the data if it is not present. 34 """ 35 data_path = os.path.join(path, "data") 36 if os.path.exists(data_path): 37 return 38 39 os.makedirs(path, exist_ok=True) 40 tar_path = os.path.join(path, "neurite-oasis.v1.0.tar") 41 util.download_source(path=tar_path, url=URL, download=download, checksum=CHECKSUM) 42 util.unzip_tarfile(tar_path=tar_path, dst=data_path, remove=False) 43 44 45def get_oasis_paths( 46 path: Union[os.PathLike, str], 47 split: Literal['train', 'val', 'test'], 48 source: Literal['orig', 'norm'] = "orig", 49 label_annotations: Literal['4', '35'] = "4", 50 download: bool = False 51) -> Tuple[List[str], List[str]]: 52 """Get paths to the OASIS data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 split: The choice of data split. 57 source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped). 58 label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy). 59 download: Whether to download the data if it is not present. 60 61 Returns: 62 List of filepaths for the image data. 63 List of filepaths for the label data. 64 """ 65 get_oasis_data(path, download) 66 67 patient_dirs = glob(os.path.join(path, "data", "OASIS_*")) 68 raw_paths, label_paths = [], [] 69 for pdir in patient_dirs: 70 raw_paths.append(os.path.join(pdir, f"{source}.nii.gz")) 71 label_paths.append(os.path.join(pdir, f"seg{label_annotations}.nii.gz")) 72 73 if split == "train": 74 raw_paths, label_paths = raw_paths[:350], label_paths[:350] 75 elif split == "val": 76 raw_paths, label_paths = raw_paths[350:375], label_paths[350:375] 77 elif split == "test": 78 raw_paths, label_paths = raw_paths[375:], label_paths[375:] 79 else: 80 raise ValueError(f"'{split}' is not a valid split.") 81 82 assert len(raw_paths) == len(label_paths) 83 84 return raw_paths, label_paths 85 86 87def get_oasis_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, ...], 90 split: Literal['train', 'val', 'test'], 91 source: Literal['orig', 'norm'] = "orig", 92 label_annotations: Literal['4', '35'] = "4", 93 resize_inputs: bool = False, 94 download: bool = False, 95 **kwargs 96) -> Dataset: 97 """Get the OASIS dataset for tissue / anatomical segmentation. 98 99 Args: 100 path: Filepath to a folder where the data is downloaded for further processing. 101 patch_shape: The patch shape to use for training. 102 split: The choice of data split. 103 source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped). 104 label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy). 105 resize_inputs: Whether to resize inputs to the desired patch shape. 106 download: Whether to download the data if it is not present. 107 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 108 109 Returns: 110 The segmentation dataset. 111 """ 112 raw_paths, label_paths = get_oasis_paths(path, split, source, label_annotations, download) 113 114 if resize_inputs: 115 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 116 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 117 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 118 ) 119 120 return torch_em.default_segmentation_dataset( 121 raw_paths=raw_paths, 122 raw_key="data", 123 label_paths=label_paths, 124 label_key="data", 125 patch_shape=patch_shape, 126 is_seg_dataset=True, 127 **kwargs 128 ) 129 130 131def get_oasis_loader( 132 path: Union[os.PathLike, str], 133 batch_size: int, 134 patch_shape: Tuple[int, ...], 135 split: Literal['train', 'val', 'test'], 136 source: Literal['orig', 'norm'] = "orig", 137 label_annotations: Literal['4', '35'] = "4", 138 resize_inputs: bool = False, 139 download: bool = False, 140 **kwargs 141) -> DataLoader: 142 """Get the OASIS dataloader for tissue / anatomical segmentation. 143 144 Args: 145 path: Filepath to a folder where the data is downloaded for further processing. 146 batch_size: The batch size for training. 147 patch_shape: The patch shape to use for training. 148 split: The choice of data split. 149 source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped). 150 label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy). 151 resize_inputs: Whether to resize inputs to the desired patch shape. 152 download: Whether to download the data if it is not present. 153 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 154 155 Returns: 156 The DataLoader. 157 """ 158 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 159 dataset = get_oasis_dataset( 160 path, patch_shape, split, source, label_annotations, resize_inputs, download, **ds_kwargs 161 ) 162 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://surfer.nmr.mgh.harvard.edu/ftp/data/neurite/data/neurite-oasis.v1.0.tar'
CHECKSUM =
'86dd117dda17f736ade8a4088d7e98e066e1181950fe8b406f1a35f7fb743e78'
def
get_oasis_data(path: Union[os.PathLike, str], download: bool = False):
29def get_oasis_data(path: Union[os.PathLike, str], download: bool = False): 30 """Download the OASIS dataset. 31 32 Args: 33 path: Filepath to a folder where the data is downloaded for further processing. 34 download: Whether to download the data if it is not present. 35 """ 36 data_path = os.path.join(path, "data") 37 if os.path.exists(data_path): 38 return 39 40 os.makedirs(path, exist_ok=True) 41 tar_path = os.path.join(path, "neurite-oasis.v1.0.tar") 42 util.download_source(path=tar_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip_tarfile(tar_path=tar_path, dst=data_path, remove=False)
Download the OASIS dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
def
get_oasis_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], source: Literal['orig', 'norm'] = 'orig', label_annotations: Literal['4', '35'] = '4', download: bool = False) -> Tuple[List[str], List[str]]:
46def get_oasis_paths( 47 path: Union[os.PathLike, str], 48 split: Literal['train', 'val', 'test'], 49 source: Literal['orig', 'norm'] = "orig", 50 label_annotations: Literal['4', '35'] = "4", 51 download: bool = False 52) -> Tuple[List[str], List[str]]: 53 """Get paths to the OASIS data. 54 55 Args: 56 path: Filepath to a folder where the data is downloaded for further processing. 57 split: The choice of data split. 58 source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped). 59 label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy). 60 download: Whether to download the data if it is not present. 61 62 Returns: 63 List of filepaths for the image data. 64 List of filepaths for the label data. 65 """ 66 get_oasis_data(path, download) 67 68 patient_dirs = glob(os.path.join(path, "data", "OASIS_*")) 69 raw_paths, label_paths = [], [] 70 for pdir in patient_dirs: 71 raw_paths.append(os.path.join(pdir, f"{source}.nii.gz")) 72 label_paths.append(os.path.join(pdir, f"seg{label_annotations}.nii.gz")) 73 74 if split == "train": 75 raw_paths, label_paths = raw_paths[:350], label_paths[:350] 76 elif split == "val": 77 raw_paths, label_paths = raw_paths[350:375], label_paths[350:375] 78 elif split == "test": 79 raw_paths, label_paths = raw_paths[375:], label_paths[375:] 80 else: 81 raise ValueError(f"'{split}' is not a valid split.") 82 83 assert len(raw_paths) == len(label_paths) 84 85 return raw_paths, label_paths
Get paths to the OASIS data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
- label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_oasis_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], source: Literal['orig', 'norm'] = 'orig', label_annotations: Literal['4', '35'] = '4', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
88def get_oasis_dataset( 89 path: Union[os.PathLike, str], 90 patch_shape: Tuple[int, ...], 91 split: Literal['train', 'val', 'test'], 92 source: Literal['orig', 'norm'] = "orig", 93 label_annotations: Literal['4', '35'] = "4", 94 resize_inputs: bool = False, 95 download: bool = False, 96 **kwargs 97) -> Dataset: 98 """Get the OASIS dataset for tissue / anatomical segmentation. 99 100 Args: 101 path: Filepath to a folder where the data is downloaded for further processing. 102 patch_shape: The patch shape to use for training. 103 split: The choice of data split. 104 source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped). 105 label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy). 106 resize_inputs: Whether to resize inputs to the desired patch shape. 107 download: Whether to download the data if it is not present. 108 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 109 110 Returns: 111 The segmentation dataset. 112 """ 113 raw_paths, label_paths = get_oasis_paths(path, split, source, label_annotations, download) 114 115 if resize_inputs: 116 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 117 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 118 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 119 ) 120 121 return torch_em.default_segmentation_dataset( 122 raw_paths=raw_paths, 123 raw_key="data", 124 label_paths=label_paths, 125 label_key="data", 126 patch_shape=patch_shape, 127 is_seg_dataset=True, 128 **kwargs 129 )
Get the OASIS dataset for tissue / anatomical segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
- label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_oasis_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], source: Literal['orig', 'norm'] = 'orig', label_annotations: Literal['4', '35'] = '4', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
132def get_oasis_loader( 133 path: Union[os.PathLike, str], 134 batch_size: int, 135 patch_shape: Tuple[int, ...], 136 split: Literal['train', 'val', 'test'], 137 source: Literal['orig', 'norm'] = "orig", 138 label_annotations: Literal['4', '35'] = "4", 139 resize_inputs: bool = False, 140 download: bool = False, 141 **kwargs 142) -> DataLoader: 143 """Get the OASIS dataloader for tissue / anatomical segmentation. 144 145 Args: 146 path: Filepath to a folder where the data is downloaded for further processing. 147 batch_size: The batch size for training. 148 patch_shape: The patch shape to use for training. 149 split: The choice of data split. 150 source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped). 151 label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy). 152 resize_inputs: Whether to resize inputs to the desired patch shape. 153 download: Whether to download the data if it is not present. 154 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 155 156 Returns: 157 The DataLoader. 158 """ 159 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 160 dataset = get_oasis_dataset( 161 path, patch_shape, split, source, label_annotations, resize_inputs, download, **ds_kwargs 162 ) 163 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the OASIS dataloader for tissue / anatomical segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- source: The source of inputs. Either 'orig' (original brain scans) or 'norm' (skull stripped).
- label_annotations: The set of annotations. Either '4' (for tissues) or '35' (for anatomy).
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.