torch_em.data.datasets.medical.dca1

The DCA1 dataset contains annotations for artery segmentation in X-Ray Angiograms.

The database is located at http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms.html. This dataset is from Cervantes-Sanchez et al. - https://doi.org/10.3390/app9245507. Please cite it if you use this dataset for your research.

View Source

  1"""The DCA1 dataset contains annotations for artery segmentation in X-Ray Angiograms.
  2
  3The database is located at http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms.html.
  4This dataset is from Cervantes-Sanchez et al. - https://doi.org/10.3390/app9245507.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from natsort import natsorted
 11from typing import Union, Tuple, Literal, List
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20URL = "http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms_files/DB_Angiograms_134.zip"
 21CHECKSUM = "7161638a6e92c6a6e47a747db039292c8a1a6bad809aac0d1fd16a10a6f22a11"
 22
 23
 24def get_dca1_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 25    """Download the DCA1 dataset.
 26
 27    Args:
 28        path: Filepath to a folder where the data is downloaded for further processing.
 29        download: Whether to download the data if it is not present.
 30
 31    Returns:
 32        Filepath where the data is downloaded.
 33    """
 34    data_dir = os.path.join(path, "Database_134_Angiograms")
 35    if os.path.exists(data_dir):
 36        return data_dir
 37
 38    os.makedirs(path, exist_ok=True)
 39
 40    zip_path = os.path.join(path, "DB_Angiograms_134.zip")
 41    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 42    util.unzip(zip_path=zip_path, dst=path)
 43
 44    return data_dir
 45
 46
 47def get_dca1_paths(
 48    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
 49) -> Tuple[List[str], List[str]]:
 50    """Get paths to the DCA1 data.
 51
 52    Args:
 53        path: Filepath to a folder where the data is downloaded for further processing.
 54        split: The choice of data split.
 55        download: Whether to download the data if it is not present.
 56
 57    Returns:
 58        List of filepaths for the image data.
 59        List of filepaths for the label data.
 60    """
 61    data_dir = get_dca1_data(path=path, download=download)
 62
 63    image_paths, gt_paths = [], []
 64    for image_path in natsorted(glob(os.path.join(data_dir, "*.pgm"))):
 65        if image_path.endswith("_gt.pgm"):
 66            gt_paths.append(image_path)
 67        else:
 68            image_paths.append(image_path)
 69
 70    image_paths, gt_paths = natsorted(image_paths), natsorted(gt_paths)
 71
 72    if split == "train":  # first 85 images
 73        image_paths, gt_paths = image_paths[:-49], gt_paths[:-49]
 74    elif split == "val":  # 15 images
 75        image_paths, gt_paths = image_paths[-49:-34], gt_paths[-49:-34]
 76    elif split == "test":  # last 34 images
 77        image_paths, gt_paths = image_paths[-34:], gt_paths[-34:]
 78    else:
 79        raise ValueError(f"'{split}' is not a valid split.")
 80
 81    return image_paths, gt_paths
 82
 83
 84def get_dca1_dataset(
 85    path: Union[os.PathLike, str],
 86    patch_shape: Tuple[int, int],
 87    split: Literal["train", "val", "test"],
 88    resize_inputs: bool = False,
 89    download: bool = False,
 90    **kwargs
 91) -> Dataset:
 92    """Get the DCA1 dataset for coronary artery segmentation in x-ray angiograms.
 93
 94    Args:
 95        path: Filepath to a folder where the downloaded data will be saved.
 96        patch_shape: The patch shape to use for training.
 97        split: The choice of data split.
 98        resize_inputs: Whether to resize the inputs to the expected patch shape.
 99        download: Whether to download the data if it is not present.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103        The segmentation dataset.
104    """
105    image_paths, gt_paths = get_dca1_paths(path, split, download)
106
107    if resize_inputs:
108        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
109        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
110            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
111        )
112
113    return torch_em.default_segmentation_dataset(
114        raw_paths=image_paths,
115        raw_key=None,
116        label_paths=gt_paths,
117        label_key=None,
118        patch_shape=patch_shape,
119        is_seg_dataset=False,
120        **kwargs
121    )
122
123
124def get_dca1_loader(
125    path: Union[os.PathLike, str],
126    batch_size: int,
127    patch_shape: Tuple[int, int],
128    split: Literal["train", "val", "test"],
129    resize_inputs: bool = False,
130    download: bool = False,
131    **kwargs
132) -> DataLoader:
133    """Get the DCA1 dataloader for coronary artery segmentation in x-ray angiograms.
134
135    Args:
136        path: Filepath to a folder where the downloaded data will be saved.
137        batch_size: The batch size for training.
138        patch_shape: The patch shape to use for training.
139        split: The choice of data split.
140        resize_inputs: Whether to resize the inputs to the expected patch shape.
141        download: Whether to download the data if it is not present.
142        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
143
144    Returns:
145        The DataLoader.
146    """
147    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
148    dataset = get_dca1_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
149    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms_files/DB_Angiograms_134.zip'

CHECKSUM = '7161638a6e92c6a6e47a747db039292c8a1a6bad809aac0d1fd16a10a6f22a11'

def get_dca1_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

25def get_dca1_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26    """Download the DCA1 dataset.
27
28    Args:
29        path: Filepath to a folder where the data is downloaded for further processing.
30        download: Whether to download the data if it is not present.
31
32    Returns:
33        Filepath where the data is downloaded.
34    """
35    data_dir = os.path.join(path, "Database_134_Angiograms")
36    if os.path.exists(data_dir):
37        return data_dir
38
39    os.makedirs(path, exist_ok=True)
40
41    zip_path = os.path.join(path, "DB_Angiograms_134.zip")
42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
43    util.unzip(zip_path=zip_path, dst=path)
44
45    return data_dir

Download the DCA1 dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_dca1_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

48def get_dca1_paths(
49    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
50) -> Tuple[List[str], List[str]]:
51    """Get paths to the DCA1 data.
52
53    Args:
54        path: Filepath to a folder where the data is downloaded for further processing.
55        split: The choice of data split.
56        download: Whether to download the data if it is not present.
57
58    Returns:
59        List of filepaths for the image data.
60        List of filepaths for the label data.
61    """
62    data_dir = get_dca1_data(path=path, download=download)
63
64    image_paths, gt_paths = [], []
65    for image_path in natsorted(glob(os.path.join(data_dir, "*.pgm"))):
66        if image_path.endswith("_gt.pgm"):
67            gt_paths.append(image_path)
68        else:
69            image_paths.append(image_path)
70
71    image_paths, gt_paths = natsorted(image_paths), natsorted(gt_paths)
72
73    if split == "train":  # first 85 images
74        image_paths, gt_paths = image_paths[:-49], gt_paths[:-49]
75    elif split == "val":  # 15 images
76        image_paths, gt_paths = image_paths[-49:-34], gt_paths[-49:-34]
77    elif split == "test":  # last 34 images
78        image_paths, gt_paths = image_paths[-34:], gt_paths[-34:]
79    else:
80        raise ValueError(f"'{split}' is not a valid split.")
81
82    return image_paths, gt_paths

Get paths to the DCA1 data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_dca1_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 85def get_dca1_dataset(
 86    path: Union[os.PathLike, str],
 87    patch_shape: Tuple[int, int],
 88    split: Literal["train", "val", "test"],
 89    resize_inputs: bool = False,
 90    download: bool = False,
 91    **kwargs
 92) -> Dataset:
 93    """Get the DCA1 dataset for coronary artery segmentation in x-ray angiograms.
 94
 95    Args:
 96        path: Filepath to a folder where the downloaded data will be saved.
 97        patch_shape: The patch shape to use for training.
 98        split: The choice of data split.
 99        resize_inputs: Whether to resize the inputs to the expected patch shape.
100        download: Whether to download the data if it is not present.
101        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
102
103    Returns:
104        The segmentation dataset.
105    """
106    image_paths, gt_paths = get_dca1_paths(path, split, download)
107
108    if resize_inputs:
109        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
110        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
111            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
112        )
113
114    return torch_em.default_segmentation_dataset(
115        raw_paths=image_paths,
116        raw_key=None,
117        label_paths=gt_paths,
118        label_key=None,
119        patch_shape=patch_shape,
120        is_seg_dataset=False,
121        **kwargs
122    )

Get the DCA1 dataset for coronary artery segmentation in x-ray angiograms.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the expected patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_dca1_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

125def get_dca1_loader(
126    path: Union[os.PathLike, str],
127    batch_size: int,
128    patch_shape: Tuple[int, int],
129    split: Literal["train", "val", "test"],
130    resize_inputs: bool = False,
131    download: bool = False,
132    **kwargs
133) -> DataLoader:
134    """Get the DCA1 dataloader for coronary artery segmentation in x-ray angiograms.
135
136    Args:
137        path: Filepath to a folder where the downloaded data will be saved.
138        batch_size: The batch size for training.
139        patch_shape: The patch shape to use for training.
140        split: The choice of data split.
141        resize_inputs: Whether to resize the inputs to the expected patch shape.
142        download: Whether to download the data if it is not present.
143        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
144
145    Returns:
146        The DataLoader.
147    """
148    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
149    dataset = get_dca1_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
150    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the DCA1 dataloader for coronary artery segmentation in x-ray angiograms.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the expected patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.