torch_em.data.datasets.medical.dca1
The DCA1 dataset contains annotations for artery segmentation in X-Ray Angiograms.
The database is located at http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms.html. This dataset is from Cervantes-Sanchez et al. - https://doi.org/10.3390/app9245507. Please cite it if you use this dataset for your research.
1"""The DCA1 dataset contains annotations for artery segmentation in X-Ray Angiograms. 2 3The database is located at http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms.html. 4This dataset is from Cervantes-Sanchez et al. - https://doi.org/10.3390/app9245507. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from natsort import natsorted 11from typing import Union, Tuple, Literal, List 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20URL = "http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms_files/DB_Angiograms_134.zip" 21CHECKSUM = "7161638a6e92c6a6e47a747db039292c8a1a6bad809aac0d1fd16a10a6f22a11" 22 23 24def get_dca1_data(path: Union[os.PathLike, str], download: bool = False) -> str: 25 """Download the DCA1 dataset. 26 27 Args: 28 path: Filepath to a folder where the data is downloaded for further processing. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 Filepath where the data is downloaded. 33 """ 34 data_dir = os.path.join(path, "Database_134_Angiograms") 35 if os.path.exists(data_dir): 36 return data_dir 37 38 os.makedirs(path, exist_ok=True) 39 40 zip_path = os.path.join(path, "DB_Angiograms_134.zip") 41 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 42 util.unzip(zip_path=zip_path, dst=path) 43 44 return data_dir 45 46 47def get_dca1_paths( 48 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 49) -> Tuple[List[str], List[str]]: 50 """Get paths to the DCA1 data. 51 52 Args: 53 path: Filepath to a folder where the data is downloaded for further processing. 54 split: The choice of data split. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 List of filepaths for the image data. 59 List of filepaths for the label data. 60 """ 61 data_dir = get_dca1_data(path=path, download=download) 62 63 image_paths, gt_paths = [], [] 64 for image_path in natsorted(glob(os.path.join(data_dir, "*.pgm"))): 65 if image_path.endswith("_gt.pgm"): 66 gt_paths.append(image_path) 67 else: 68 image_paths.append(image_path) 69 70 image_paths, gt_paths = natsorted(image_paths), natsorted(gt_paths) 71 72 if split == "train": # first 85 images 73 image_paths, gt_paths = image_paths[:-49], gt_paths[:-49] 74 elif split == "val": # 15 images 75 image_paths, gt_paths = image_paths[-49:-34], gt_paths[-49:-34] 76 elif split == "test": # last 34 images 77 image_paths, gt_paths = image_paths[-34:], gt_paths[-34:] 78 else: 79 raise ValueError(f"'{split}' is not a valid split.") 80 81 return image_paths, gt_paths 82 83 84def get_dca1_dataset( 85 path: Union[os.PathLike, str], 86 patch_shape: Tuple[int, int], 87 split: Literal["train", "val", "test"], 88 resize_inputs: bool = False, 89 download: bool = False, 90 **kwargs 91) -> Dataset: 92 """Get the DCA1 dataset for coronary artery segmentation in x-ray angiograms. 93 94 Args: 95 path: Filepath to a folder where the downloaded data will be saved. 96 patch_shape: The patch shape to use for training. 97 split: The choice of data split. 98 resize_inputs: Whether to resize the inputs to the expected patch shape. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 image_paths, gt_paths = get_dca1_paths(path, split, download) 106 107 if resize_inputs: 108 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 109 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 110 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 111 ) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=image_paths, 115 raw_key=None, 116 label_paths=gt_paths, 117 label_key=None, 118 patch_shape=patch_shape, 119 is_seg_dataset=False, 120 **kwargs 121 ) 122 123 124def get_dca1_loader( 125 path: Union[os.PathLike, str], 126 batch_size: int, 127 patch_shape: Tuple[int, int], 128 split: Literal["train", "val", "test"], 129 resize_inputs: bool = False, 130 download: bool = False, 131 **kwargs 132) -> DataLoader: 133 """Get the DCA1 dataloader for coronary artery segmentation in x-ray angiograms. 134 135 Args: 136 path: Filepath to a folder where the downloaded data will be saved. 137 batch_size: The batch size for training. 138 patch_shape: The patch shape to use for training. 139 split: The choice of data split. 140 resize_inputs: Whether to resize the inputs to the expected patch shape. 141 download: Whether to download the data if it is not present. 142 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 143 144 Returns: 145 The DataLoader. 146 """ 147 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 148 dataset = get_dca1_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 149 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms_files/DB_Angiograms_134.zip'
CHECKSUM =
'7161638a6e92c6a6e47a747db039292c8a1a6bad809aac0d1fd16a10a6f22a11'
def
get_dca1_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25def get_dca1_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Download the DCA1 dataset. 27 28 Args: 29 path: Filepath to a folder where the data is downloaded for further processing. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 Filepath where the data is downloaded. 34 """ 35 data_dir = os.path.join(path, "Database_134_Angiograms") 36 if os.path.exists(data_dir): 37 return data_dir 38 39 os.makedirs(path, exist_ok=True) 40 41 zip_path = os.path.join(path, "DB_Angiograms_134.zip") 42 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip(zip_path=zip_path, dst=path) 44 45 return data_dir
Download the DCA1 dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_dca1_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
48def get_dca1_paths( 49 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 50) -> Tuple[List[str], List[str]]: 51 """Get paths to the DCA1 data. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 split: The choice of data split. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 List of filepaths for the image data. 60 List of filepaths for the label data. 61 """ 62 data_dir = get_dca1_data(path=path, download=download) 63 64 image_paths, gt_paths = [], [] 65 for image_path in natsorted(glob(os.path.join(data_dir, "*.pgm"))): 66 if image_path.endswith("_gt.pgm"): 67 gt_paths.append(image_path) 68 else: 69 image_paths.append(image_path) 70 71 image_paths, gt_paths = natsorted(image_paths), natsorted(gt_paths) 72 73 if split == "train": # first 85 images 74 image_paths, gt_paths = image_paths[:-49], gt_paths[:-49] 75 elif split == "val": # 15 images 76 image_paths, gt_paths = image_paths[-49:-34], gt_paths[-49:-34] 77 elif split == "test": # last 34 images 78 image_paths, gt_paths = image_paths[-34:], gt_paths[-34:] 79 else: 80 raise ValueError(f"'{split}' is not a valid split.") 81 82 return image_paths, gt_paths
Get paths to the DCA1 data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_dca1_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
85def get_dca1_dataset( 86 path: Union[os.PathLike, str], 87 patch_shape: Tuple[int, int], 88 split: Literal["train", "val", "test"], 89 resize_inputs: bool = False, 90 download: bool = False, 91 **kwargs 92) -> Dataset: 93 """Get the DCA1 dataset for coronary artery segmentation in x-ray angiograms. 94 95 Args: 96 path: Filepath to a folder where the downloaded data will be saved. 97 patch_shape: The patch shape to use for training. 98 split: The choice of data split. 99 resize_inputs: Whether to resize the inputs to the expected patch shape. 100 download: Whether to download the data if it is not present. 101 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 102 103 Returns: 104 The segmentation dataset. 105 """ 106 image_paths, gt_paths = get_dca1_paths(path, split, download) 107 108 if resize_inputs: 109 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 110 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 111 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 112 ) 113 114 return torch_em.default_segmentation_dataset( 115 raw_paths=image_paths, 116 raw_key=None, 117 label_paths=gt_paths, 118 label_key=None, 119 patch_shape=patch_shape, 120 is_seg_dataset=False, 121 **kwargs 122 )
Get the DCA1 dataset for coronary artery segmentation in x-ray angiograms.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_dca1_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
125def get_dca1_loader( 126 path: Union[os.PathLike, str], 127 batch_size: int, 128 patch_shape: Tuple[int, int], 129 split: Literal["train", "val", "test"], 130 resize_inputs: bool = False, 131 download: bool = False, 132 **kwargs 133) -> DataLoader: 134 """Get the DCA1 dataloader for coronary artery segmentation in x-ray angiograms. 135 136 Args: 137 path: Filepath to a folder where the downloaded data will be saved. 138 batch_size: The batch size for training. 139 patch_shape: The patch shape to use for training. 140 split: The choice of data split. 141 resize_inputs: Whether to resize the inputs to the expected patch shape. 142 download: Whether to download the data if it is not present. 143 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 144 145 Returns: 146 The DataLoader. 147 """ 148 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 149 dataset = get_dca1_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 150 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the DCA1 dataloader for coronary artery segmentation in x-ray angiograms.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.