torch_em.data.datasets.medical.fundus_avseg

The Fundus AVSeg dataset contains annotations for artery-vein segmentation in fundus images.

For the class labels: red represents arteries, blue represents veins, green represents artery-vein crossings, and white represents vessels of uncertain classification.

This dataset is from the publication https://doi.org/10.1038/s41597-025-05381-2. Please cite it if you use this dataset for your research.

  1"""The Fundus AVSeg dataset contains annotations for artery-vein segmentation in
  2fundus images.
  3
  4For the class labels: red represents arteries, blue represents veins, green represents artery-vein crossings,
  5and white represents vessels of uncertain classification.
  6
  7This dataset is from the publication https://doi.org/10.1038/s41597-025-05381-2.
  8Please cite it if you use this dataset for your research.
  9"""
 10
 11import os
 12from glob import glob
 13from pathlib import Path
 14from typing import Union, Tuple, Literal, List
 15
 16import numpy as np
 17import pandas as pd
 18import imageio.v3 as imageio
 19
 20from torch.utils.data import Dataset, DataLoader
 21
 22import torch_em
 23
 24from .. import util
 25
 26
 27URL = "https://figshare.com/ndownloader/files/54093641"
 28CHECKSUM = "6db5ff43c4e9c25aa93093aa295c67b10fa0c089ac650df6665c7a6bbae9539f"
 29
 30
 31def _process_labels(data_dir):
 32    label_paths = glob(os.path.join(data_dir, "annotation", "*.png"))
 33    for label_path in label_paths:
 34        labels = imageio.imread(label_path)
 35
 36        # New empty label.
 37        neu_labels = np.zeros(labels.shape[:2])
 38
 39        # Map labels to specific ids.
 40        neu_labels[np.all(labels == (255, 0, 0), axis=-1)] = 1   # red are arteries.
 41        neu_labels[np.all(labels == (0, 0, 255), axis=-1)] = 2   # blue are veins.
 42        neu_labels[np.all(labels == (0, 255,   0), axis=-1)] = 3   # green are overlaps.
 43        neu_labels[np.all(labels == (255, 255, 255), axis=-1)] = 4   # white are unknown.
 44
 45        imageio.imwrite(Path(label_path).with_suffix(".tif"), neu_labels, compression="zlib")
 46
 47        os.remove(label_path)
 48
 49
 50def get_fundus_avseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 51    """Download the Fundus AVSeg data.
 52
 53    Args:
 54        path: Filepath to a folder where the data is downloaded for further processing.
 55        download: Whether to download the data if it is not present.
 56
 57    Returns:
 58        Folder where the data is stored.
 59    """
 60    data_dir = os.path.join(path, "Fundus-AVSeg")
 61    if os.path.exists(data_dir):
 62        return data_dir
 63
 64    os.makedirs(path, exist_ok=True)
 65
 66    zip_path = os.path.join(path, "Fundus_AVSeg.zip")
 67    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 68    util.unzip(zip_path=zip_path, dst=path)
 69
 70    _process_labels(data_dir)
 71
 72    return data_dir
 73
 74
 75def get_fundus_avseg_paths(
 76    path: Union[os.PathLike, str],
 77    split: Literal["train", "val", "test"],
 78    download: bool = False
 79) -> Tuple[List[str], List[str]]:
 80    """Download the Fundus AVSeg data.
 81
 82    Args:
 83        path: Filepath to a folder where the data is downloaded for further processing.
 84        split: The data split to use. Either 'train', 'val' or 'test'.
 85        download: Whether to download the data if it is not present.
 86
 87    Returns:
 88        List of filepaths for the image data.
 89        List of filepaths for the label data.
 90    """
 91    data_dir = get_fundus_avseg_data(path, download)
 92
 93    if split == "test":
 94        df = pd.read_csv(os.path.join(data_dir, "testing.txt"))
 95    elif split in ["train", "val"]:
 96        df = pd.read_csv(os.path.join(data_dir, "training.txt"))
 97    else:
 98        raise ValueError(f"'{split}' is not a valid split choice.")
 99
100    fnames = df.iloc[:, 0].tolist()
101
102    if split == "train":
103        fnames = fnames[:-15]
104    elif split == "val":  # Select last 15 images for validation.
105        fnames = fnames[-15:]
106
107    raw_paths = [os.path.join(data_dir, "images", fname) for fname in fnames]
108    label_paths = [
109        str(Path(os.path.join(data_dir, "annotation", fname)).with_suffix(".tif")) for fname in fnames
110    ]
111
112    return raw_paths, label_paths
113
114
115def get_fundus_avseg_dataset(
116    path: Union[os.PathLike, str],
117    patch_shape: Tuple[int, int],
118    split: Literal["train", "val", "test"],
119    resize_inputs: bool = False,
120    download: bool = False,
121    **kwargs
122) -> Dataset:
123    """Get the Fundus AVSeg dataset for artery-vein segmentation.
124
125    Args:
126        path: Filepath to a folder where the data is downloaded for further processing.
127        patch_shape: The patch shape to use for training.
128        split: The data split to use. Either 'train', 'val' or 'test'.
129        resize_inputs: Whether to resize the inputs to the patch shape.
130        download: Whether to download the data if it is not present.
131        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
132
133    Returns:
134        The segmentation dataset.
135    """
136    raw_paths, label_paths = get_fundus_avseg_paths(path, split, download)
137
138    if resize_inputs:
139        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
140        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
141            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
142        )
143
144    return torch_em.default_segmentation_dataset(
145        raw_paths=raw_paths,
146        raw_key=None,
147        label_paths=label_paths,
148        label_key=None,
149        is_seg_dataset=False,
150        patch_shape=patch_shape,
151        ndim=2,
152        **kwargs
153    )
154
155
156def get_fundus_avseg_loader(
157    path: Union[os.PathLike, str],
158    batch_size: int,
159    patch_shape: Tuple[int, int],
160    split: Literal["train", "val", "test"],
161    resize_inputs: bool = False,
162    download: bool = False,
163    **kwargs
164) -> DataLoader:
165    """Get the Fundus AVSeg dataloader for artery-vein segmentation.
166
167    Args:
168        path: Filepath to a folder where the data is downloaded for further processing.
169        patch_shape: The patch shape to use for training.
170        split: The data split to use. Either 'train', 'val' or 'test'.
171        resize_inputs: Whether to resize the inputs to the patch shape.
172        download: Whether to download the data if it is not present.
173        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
174
175    Returns:
176        The DataLoader.
177    """
178    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
179    dataset = get_fundus_avseg_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
180    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://figshare.com/ndownloader/files/54093641'
CHECKSUM = '6db5ff43c4e9c25aa93093aa295c67b10fa0c089ac650df6665c7a6bbae9539f'
def get_fundus_avseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
51def get_fundus_avseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
52    """Download the Fundus AVSeg data.
53
54    Args:
55        path: Filepath to a folder where the data is downloaded for further processing.
56        download: Whether to download the data if it is not present.
57
58    Returns:
59        Folder where the data is stored.
60    """
61    data_dir = os.path.join(path, "Fundus-AVSeg")
62    if os.path.exists(data_dir):
63        return data_dir
64
65    os.makedirs(path, exist_ok=True)
66
67    zip_path = os.path.join(path, "Fundus_AVSeg.zip")
68    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
69    util.unzip(zip_path=zip_path, dst=path)
70
71    _process_labels(data_dir)
72
73    return data_dir

Download the Fundus AVSeg data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Folder where the data is stored.

def get_fundus_avseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
 76def get_fundus_avseg_paths(
 77    path: Union[os.PathLike, str],
 78    split: Literal["train", "val", "test"],
 79    download: bool = False
 80) -> Tuple[List[str], List[str]]:
 81    """Download the Fundus AVSeg data.
 82
 83    Args:
 84        path: Filepath to a folder where the data is downloaded for further processing.
 85        split: The data split to use. Either 'train', 'val' or 'test'.
 86        download: Whether to download the data if it is not present.
 87
 88    Returns:
 89        List of filepaths for the image data.
 90        List of filepaths for the label data.
 91    """
 92    data_dir = get_fundus_avseg_data(path, download)
 93
 94    if split == "test":
 95        df = pd.read_csv(os.path.join(data_dir, "testing.txt"))
 96    elif split in ["train", "val"]:
 97        df = pd.read_csv(os.path.join(data_dir, "training.txt"))
 98    else:
 99        raise ValueError(f"'{split}' is not a valid split choice.")
100
101    fnames = df.iloc[:, 0].tolist()
102
103    if split == "train":
104        fnames = fnames[:-15]
105    elif split == "val":  # Select last 15 images for validation.
106        fnames = fnames[-15:]
107
108    raw_paths = [os.path.join(data_dir, "images", fname) for fname in fnames]
109    label_paths = [
110        str(Path(os.path.join(data_dir, "annotation", fname)).with_suffix(".tif")) for fname in fnames
111    ]
112
113    return raw_paths, label_paths

Download the Fundus AVSeg data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_fundus_avseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
116def get_fundus_avseg_dataset(
117    path: Union[os.PathLike, str],
118    patch_shape: Tuple[int, int],
119    split: Literal["train", "val", "test"],
120    resize_inputs: bool = False,
121    download: bool = False,
122    **kwargs
123) -> Dataset:
124    """Get the Fundus AVSeg dataset for artery-vein segmentation.
125
126    Args:
127        path: Filepath to a folder where the data is downloaded for further processing.
128        patch_shape: The patch shape to use for training.
129        split: The data split to use. Either 'train', 'val' or 'test'.
130        resize_inputs: Whether to resize the inputs to the patch shape.
131        download: Whether to download the data if it is not present.
132        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
133
134    Returns:
135        The segmentation dataset.
136    """
137    raw_paths, label_paths = get_fundus_avseg_paths(path, split, download)
138
139    if resize_inputs:
140        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
141        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
142            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
143        )
144
145    return torch_em.default_segmentation_dataset(
146        raw_paths=raw_paths,
147        raw_key=None,
148        label_paths=label_paths,
149        label_key=None,
150        is_seg_dataset=False,
151        patch_shape=patch_shape,
152        ndim=2,
153        **kwargs
154    )

Get the Fundus AVSeg dataset for artery-vein segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • resize_inputs: Whether to resize the inputs to the patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_fundus_avseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
157def get_fundus_avseg_loader(
158    path: Union[os.PathLike, str],
159    batch_size: int,
160    patch_shape: Tuple[int, int],
161    split: Literal["train", "val", "test"],
162    resize_inputs: bool = False,
163    download: bool = False,
164    **kwargs
165) -> DataLoader:
166    """Get the Fundus AVSeg dataloader for artery-vein segmentation.
167
168    Args:
169        path: Filepath to a folder where the data is downloaded for further processing.
170        patch_shape: The patch shape to use for training.
171        split: The data split to use. Either 'train', 'val' or 'test'.
172        resize_inputs: Whether to resize the inputs to the patch shape.
173        download: Whether to download the data if it is not present.
174        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
175
176    Returns:
177        The DataLoader.
178    """
179    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
180    dataset = get_fundus_avseg_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
181    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Fundus AVSeg dataloader for artery-vein segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • resize_inputs: Whether to resize the inputs to the patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.