torch_em.data.datasets.medical.fundus_avseg
The Fundus AVSeg dataset contains annotations for artery-vein segmentation in fundus images.
For the class labels: red represents arteries, blue represents veins, green represents artery-vein crossings, and white represents vessels of uncertain classification.
This dataset is from the publication https://doi.org/10.1038/s41597-025-05381-2. Please cite it if you use this dataset for your research.
1"""The Fundus AVSeg dataset contains annotations for artery-vein segmentation in 2fundus images. 3 4For the class labels: red represents arteries, blue represents veins, green represents artery-vein crossings, 5and white represents vessels of uncertain classification. 6 7This dataset is from the publication https://doi.org/10.1038/s41597-025-05381-2. 8Please cite it if you use this dataset for your research. 9""" 10 11import os 12from glob import glob 13from pathlib import Path 14from typing import Union, Tuple, Literal, List 15 16import numpy as np 17import pandas as pd 18import imageio.v3 as imageio 19 20from torch.utils.data import Dataset, DataLoader 21 22import torch_em 23 24from .. import util 25 26 27URL = "https://figshare.com/ndownloader/files/54093641" 28CHECKSUM = "6db5ff43c4e9c25aa93093aa295c67b10fa0c089ac650df6665c7a6bbae9539f" 29 30 31def _process_labels(data_dir): 32 label_paths = glob(os.path.join(data_dir, "annotation", "*.png")) 33 for label_path in label_paths: 34 labels = imageio.imread(label_path) 35 36 # New empty label. 37 neu_labels = np.zeros(labels.shape[:2]) 38 39 # Map labels to specific ids. 40 neu_labels[np.all(labels == (255, 0, 0), axis=-1)] = 1 # red are arteries. 41 neu_labels[np.all(labels == (0, 0, 255), axis=-1)] = 2 # blue are veins. 42 neu_labels[np.all(labels == (0, 255, 0), axis=-1)] = 3 # green are overlaps. 43 neu_labels[np.all(labels == (255, 255, 255), axis=-1)] = 4 # white are unknown. 44 45 imageio.imwrite(Path(label_path).with_suffix(".tif"), neu_labels, compression="zlib") 46 47 os.remove(label_path) 48 49 50def get_fundus_avseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 51 """Download the Fundus AVSeg data. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 Folder where the data is stored. 59 """ 60 data_dir = os.path.join(path, "Fundus-AVSeg") 61 if os.path.exists(data_dir): 62 return data_dir 63 64 os.makedirs(path, exist_ok=True) 65 66 zip_path = os.path.join(path, "Fundus_AVSeg.zip") 67 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 68 util.unzip(zip_path=zip_path, dst=path) 69 70 _process_labels(data_dir) 71 72 return data_dir 73 74 75def get_fundus_avseg_paths( 76 path: Union[os.PathLike, str], 77 split: Literal["train", "val", "test"], 78 download: bool = False 79) -> Tuple[List[str], List[str]]: 80 """Download the Fundus AVSeg data. 81 82 Args: 83 path: Filepath to a folder where the data is downloaded for further processing. 84 split: The data split to use. Either 'train', 'val' or 'test'. 85 download: Whether to download the data if it is not present. 86 87 Returns: 88 List of filepaths for the image data. 89 List of filepaths for the label data. 90 """ 91 data_dir = get_fundus_avseg_data(path, download) 92 93 if split == "test": 94 df = pd.read_csv(os.path.join(data_dir, "testing.txt")) 95 elif split in ["train", "val"]: 96 df = pd.read_csv(os.path.join(data_dir, "training.txt")) 97 else: 98 raise ValueError(f"'{split}' is not a valid split choice.") 99 100 fnames = df.iloc[:, 0].tolist() 101 102 if split == "train": 103 fnames = fnames[:-15] 104 elif split == "val": # Select last 15 images for validation. 105 fnames = fnames[-15:] 106 107 raw_paths = [os.path.join(data_dir, "images", fname) for fname in fnames] 108 label_paths = [ 109 str(Path(os.path.join(data_dir, "annotation", fname)).with_suffix(".tif")) for fname in fnames 110 ] 111 112 return raw_paths, label_paths 113 114 115def get_fundus_avseg_dataset( 116 path: Union[os.PathLike, str], 117 patch_shape: Tuple[int, int], 118 split: Literal["train", "val", "test"], 119 resize_inputs: bool = False, 120 download: bool = False, 121 **kwargs 122) -> Dataset: 123 """Get the Fundus AVSeg dataset for artery-vein segmentation. 124 125 Args: 126 path: Filepath to a folder where the data is downloaded for further processing. 127 patch_shape: The patch shape to use for training. 128 split: The data split to use. Either 'train', 'val' or 'test'. 129 resize_inputs: Whether to resize the inputs to the patch shape. 130 download: Whether to download the data if it is not present. 131 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 132 133 Returns: 134 The segmentation dataset. 135 """ 136 raw_paths, label_paths = get_fundus_avseg_paths(path, split, download) 137 138 if resize_inputs: 139 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 140 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 141 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 142 ) 143 144 return torch_em.default_segmentation_dataset( 145 raw_paths=raw_paths, 146 raw_key=None, 147 label_paths=label_paths, 148 label_key=None, 149 is_seg_dataset=False, 150 patch_shape=patch_shape, 151 ndim=2, 152 **kwargs 153 ) 154 155 156def get_fundus_avseg_loader( 157 path: Union[os.PathLike, str], 158 batch_size: int, 159 patch_shape: Tuple[int, int], 160 split: Literal["train", "val", "test"], 161 resize_inputs: bool = False, 162 download: bool = False, 163 **kwargs 164) -> DataLoader: 165 """Get the Fundus AVSeg dataloader for artery-vein segmentation. 166 167 Args: 168 path: Filepath to a folder where the data is downloaded for further processing. 169 patch_shape: The patch shape to use for training. 170 split: The data split to use. Either 'train', 'val' or 'test'. 171 resize_inputs: Whether to resize the inputs to the patch shape. 172 download: Whether to download the data if it is not present. 173 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 174 175 Returns: 176 The DataLoader. 177 """ 178 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 179 dataset = get_fundus_avseg_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 180 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://figshare.com/ndownloader/files/54093641'
CHECKSUM =
'6db5ff43c4e9c25aa93093aa295c67b10fa0c089ac650df6665c7a6bbae9539f'
def
get_fundus_avseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
51def get_fundus_avseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 52 """Download the Fundus AVSeg data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 Folder where the data is stored. 60 """ 61 data_dir = os.path.join(path, "Fundus-AVSeg") 62 if os.path.exists(data_dir): 63 return data_dir 64 65 os.makedirs(path, exist_ok=True) 66 67 zip_path = os.path.join(path, "Fundus_AVSeg.zip") 68 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 69 util.unzip(zip_path=zip_path, dst=path) 70 71 _process_labels(data_dir) 72 73 return data_dir
Download the Fundus AVSeg data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Folder where the data is stored.
def
get_fundus_avseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
76def get_fundus_avseg_paths( 77 path: Union[os.PathLike, str], 78 split: Literal["train", "val", "test"], 79 download: bool = False 80) -> Tuple[List[str], List[str]]: 81 """Download the Fundus AVSeg data. 82 83 Args: 84 path: Filepath to a folder where the data is downloaded for further processing. 85 split: The data split to use. Either 'train', 'val' or 'test'. 86 download: Whether to download the data if it is not present. 87 88 Returns: 89 List of filepaths for the image data. 90 List of filepaths for the label data. 91 """ 92 data_dir = get_fundus_avseg_data(path, download) 93 94 if split == "test": 95 df = pd.read_csv(os.path.join(data_dir, "testing.txt")) 96 elif split in ["train", "val"]: 97 df = pd.read_csv(os.path.join(data_dir, "training.txt")) 98 else: 99 raise ValueError(f"'{split}' is not a valid split choice.") 100 101 fnames = df.iloc[:, 0].tolist() 102 103 if split == "train": 104 fnames = fnames[:-15] 105 elif split == "val": # Select last 15 images for validation. 106 fnames = fnames[-15:] 107 108 raw_paths = [os.path.join(data_dir, "images", fname) for fname in fnames] 109 label_paths = [ 110 str(Path(os.path.join(data_dir, "annotation", fname)).with_suffix(".tif")) for fname in fnames 111 ] 112 113 return raw_paths, label_paths
Download the Fundus AVSeg data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_fundus_avseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
116def get_fundus_avseg_dataset( 117 path: Union[os.PathLike, str], 118 patch_shape: Tuple[int, int], 119 split: Literal["train", "val", "test"], 120 resize_inputs: bool = False, 121 download: bool = False, 122 **kwargs 123) -> Dataset: 124 """Get the Fundus AVSeg dataset for artery-vein segmentation. 125 126 Args: 127 path: Filepath to a folder where the data is downloaded for further processing. 128 patch_shape: The patch shape to use for training. 129 split: The data split to use. Either 'train', 'val' or 'test'. 130 resize_inputs: Whether to resize the inputs to the patch shape. 131 download: Whether to download the data if it is not present. 132 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 133 134 Returns: 135 The segmentation dataset. 136 """ 137 raw_paths, label_paths = get_fundus_avseg_paths(path, split, download) 138 139 if resize_inputs: 140 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 141 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 142 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 143 ) 144 145 return torch_em.default_segmentation_dataset( 146 raw_paths=raw_paths, 147 raw_key=None, 148 label_paths=label_paths, 149 label_key=None, 150 is_seg_dataset=False, 151 patch_shape=patch_shape, 152 ndim=2, 153 **kwargs 154 )
Get the Fundus AVSeg dataset for artery-vein segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_fundus_avseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
157def get_fundus_avseg_loader( 158 path: Union[os.PathLike, str], 159 batch_size: int, 160 patch_shape: Tuple[int, int], 161 split: Literal["train", "val", "test"], 162 resize_inputs: bool = False, 163 download: bool = False, 164 **kwargs 165) -> DataLoader: 166 """Get the Fundus AVSeg dataloader for artery-vein segmentation. 167 168 Args: 169 path: Filepath to a folder where the data is downloaded for further processing. 170 patch_shape: The patch shape to use for training. 171 split: The data split to use. Either 'train', 'val' or 'test'. 172 resize_inputs: Whether to resize the inputs to the patch shape. 173 download: Whether to download the data if it is not present. 174 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 175 176 Returns: 177 The DataLoader. 178 """ 179 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 180 dataset = get_fundus_avseg_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 181 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Fundus AVSeg dataloader for artery-vein segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.