torch_em.data.datasets.medical.verse
The VerSe dataset contains annotations for vertebrae segmentation in CT scans.
This dataset is from the publication https://doi.org/10.1016/j.media.2021.102166. Please cite it if you use this dataset for your research.
1"""The VerSe dataset contains annotations for vertebrae segmentation in CT scans. 2 3This dataset is from the publication https://doi.org/10.1016/j.media.2021.102166. 4Please cite it if you use this dataset for your research. 5""" 6 7import os 8from glob import glob 9from natsort import natsorted 10from typing import Union, Tuple, Literal 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19URL = { 20 "train": "https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463786541a01e714d390/?zip=", 21 "val": "https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463686541a01eb15048c/?zip=", 22 "test": "https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa4635ba010901f0891bd0/?zip=" 23} 24 25# FIXME the checksums are not reliable (same behaviour spotted in PlantSeg downloads from osf) 26CHECKSUM = { 27 "train": None, 28 "val": None, 29 "test": None, 30} 31 32 33def get_verse_data( 34 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 35) -> str: 36 """Download the VerSe dataset. 37 38 Args: 39 path: Filepath to a folder where the data is downloaded for further processing. 40 split: The data split to use. Either 'train', 'val' or 'test'. 41 download: Whether to download the data if it is not present. 42 43 Returns: 44 Filepath where the data is downloaded. 45 """ 46 assert split in ["train", "val", "test"], f"'{split}' is not a valid split." 47 48 data_dir = os.path.join(path, "data", split) 49 if os.path.exists(data_dir): 50 return data_dir 51 52 os.makedirs(path, exist_ok=True) 53 54 zip_path = os.path.join(path, f"verse2020_{split}.zip") 55 util.download_source(path=zip_path, url=URL[split], download=download, checksum=CHECKSUM[split]) 56 util.unzip(zip_path=zip_path, dst=data_dir) 57 58 return data_dir 59 60 61def get_verse_paths( 62 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 63) -> str: 64 """Get paths to the VerSe data. 65 66 Args: 67 path: Filepath to a folder where the data is downloaded for further processing. 68 split: The data split to use. Either 'train', 'val' or 'test'. 69 download: Whether to download the data if it is not present. 70 71 Returns: 72 List of filepaths for the image data. 73 List of filepaths for the label data. 74 """ 75 data_dir = get_verse_data(path, split, download) 76 77 image_paths = natsorted(glob(os.path.join(data_dir, "rawdata", "*", "*_ct.nii.gz"))) 78 gt_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "*", "*_msk.nii.gz"))) 79 80 return image_paths, gt_paths 81 82 83def get_verse_dataset( 84 path: Union[os.PathLike, str], 85 patch_shape: Tuple[int, ...], 86 split: Literal['train', 'val', 'test'], 87 resize_inputs: bool = False, 88 download: bool = False, 89 **kwargs 90) -> Dataset: 91 """Get the VerSe dataset for vertebrae segmentation. 92 93 Args: 94 path: Filepath to a folder where the data is downloaded for further processing. 95 patch_shape: The patch shape to use for training. 96 split: The data split to use. Either 'train', 'val' or 'test'. 97 resize_inputs: Whether to resize inputs to the desired patch shape. 98 download: Whether to download the data if it is not present. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 100 101 Returns: 102 The segmentation dataset. 103 """ 104 image_paths, gt_paths = get_verse_paths(path, split, download) 105 106 if resize_inputs: 107 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 108 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 109 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 110 ) 111 112 return torch_em.default_segmentation_dataset( 113 raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs 114 ) 115 116 117def get_verse_loader( 118 path: Union[os.PathLike, str], 119 batch_size: int, 120 patch_shape: Tuple[int, ...], 121 split: Literal['train', 'val', 'test'], 122 resize_inputs: bool = False, 123 download: bool = False, 124 **kwargs 125) -> DataLoader: 126 """Get the VerSe dataloader for vertebrae segmentation. 127 128 Args: 129 path: Filepath to a folder where the data is downloaded for further processing. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 split: The data split to use. Either 'train', 'val' or 'test'. 133 resize_inputs: Whether to resize inputs to the desired patch shape. 134 download: Whether to download the data if it is not present. 135 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 136 137 Returns: 138 The DataLoader. 139 """ 140 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 141 dataset = get_verse_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 142 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
{'train': 'https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463786541a01e714d390/?zip=', 'val': 'https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa463686541a01eb15048c/?zip=', 'test': 'https://files.de-1.osf.io/v1/resources/4skx2/providers/osfstorage/5ffa4635ba010901f0891bd0/?zip='}
CHECKSUM =
{'train': None, 'val': None, 'test': None}
def
get_verse_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> str:
34def get_verse_data( 35 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 36) -> str: 37 """Download the VerSe dataset. 38 39 Args: 40 path: Filepath to a folder where the data is downloaded for further processing. 41 split: The data split to use. Either 'train', 'val' or 'test'. 42 download: Whether to download the data if it is not present. 43 44 Returns: 45 Filepath where the data is downloaded. 46 """ 47 assert split in ["train", "val", "test"], f"'{split}' is not a valid split." 48 49 data_dir = os.path.join(path, "data", split) 50 if os.path.exists(data_dir): 51 return data_dir 52 53 os.makedirs(path, exist_ok=True) 54 55 zip_path = os.path.join(path, f"verse2020_{split}.zip") 56 util.download_source(path=zip_path, url=URL[split], download=download, checksum=CHECKSUM[split]) 57 util.unzip(zip_path=zip_path, dst=data_dir) 58 59 return data_dir
Download the VerSe dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_verse_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> str:
62def get_verse_paths( 63 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 64) -> str: 65 """Get paths to the VerSe data. 66 67 Args: 68 path: Filepath to a folder where the data is downloaded for further processing. 69 split: The data split to use. Either 'train', 'val' or 'test'. 70 download: Whether to download the data if it is not present. 71 72 Returns: 73 List of filepaths for the image data. 74 List of filepaths for the label data. 75 """ 76 data_dir = get_verse_data(path, split, download) 77 78 image_paths = natsorted(glob(os.path.join(data_dir, "rawdata", "*", "*_ct.nii.gz"))) 79 gt_paths = natsorted(glob(os.path.join(data_dir, "derivatives", "*", "*_msk.nii.gz"))) 80 81 return image_paths, gt_paths
Get paths to the VerSe data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_verse_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
84def get_verse_dataset( 85 path: Union[os.PathLike, str], 86 patch_shape: Tuple[int, ...], 87 split: Literal['train', 'val', 'test'], 88 resize_inputs: bool = False, 89 download: bool = False, 90 **kwargs 91) -> Dataset: 92 """Get the VerSe dataset for vertebrae segmentation. 93 94 Args: 95 path: Filepath to a folder where the data is downloaded for further processing. 96 patch_shape: The patch shape to use for training. 97 split: The data split to use. Either 'train', 'val' or 'test'. 98 resize_inputs: Whether to resize inputs to the desired patch shape. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 image_paths, gt_paths = get_verse_paths(path, split, download) 106 107 if resize_inputs: 108 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 109 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 110 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 111 ) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs 115 )
Get the VerSe dataset for vertebrae segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_verse_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
118def get_verse_loader( 119 path: Union[os.PathLike, str], 120 batch_size: int, 121 patch_shape: Tuple[int, ...], 122 split: Literal['train', 'val', 'test'], 123 resize_inputs: bool = False, 124 download: bool = False, 125 **kwargs 126) -> DataLoader: 127 """Get the VerSe dataloader for vertebrae segmentation. 128 129 Args: 130 path: Filepath to a folder where the data is downloaded for further processing. 131 batch_size: The batch size for training. 132 patch_shape: The patch shape to use for training. 133 split: The data split to use. Either 'train', 'val' or 'test'. 134 resize_inputs: Whether to resize inputs to the desired patch shape. 135 download: Whether to download the data if it is not present. 136 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 137 138 Returns: 139 The DataLoader. 140 """ 141 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 142 dataset = get_verse_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 143 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the VerSe dataloader for vertebrae segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.