torch_em.data.datasets.medical.papila
The Papila dataset contains annotations for optic disc and optic cup segmentation in Fundus images.
This dataset is located at https://figshare.com/articles/dataset/PAPILA/14798004/2. The dataset is from the publication https://doi.org/10.1038/s41597-022-01388-1. Please cite it if you use this dataset for your research.
1"""The Papila dataset contains annotations for optic disc and optic cup 2segmentation in Fundus images. 3 4This dataset is located at https://figshare.com/articles/dataset/PAPILA/14798004/2. 5The dataset is from the publication https://doi.org/10.1038/s41597-022-01388-1. 6Please cite it if you use this dataset for your research. 7""" 8 9import os 10from glob import glob 11from tqdm import tqdm 12from pathlib import Path 13from typing import Union, Tuple, Literal, List 14 15import numpy as np 16from skimage import draw 17import imageio.v3 as imageio 18 19import torch_em 20 21from .. import util 22 23 24URL = "https://figshare.com/ndownloader/files/35013982" 25CHECKSUM = "15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d" 26 27 28def get_papila_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the Papila dataset. 30 31 Args: 32 path: Filepath to a folder where the data is downloaded for further processing. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 Filepath where the data is downloaded. 37 """ 38 data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f") 39 if os.path.exists(data_dir): 40 return data_dir 41 42 os.makedirs(path, exist_ok=True) 43 44 zip_path = os.path.join(path, "papila.zip") 45 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 46 util.unzip(zip_path=zip_path, dst=path) 47 48 return data_dir 49 50 51# contour_to_mask() functions taken from https://github.com/matterport/Mask_RCNN 52def contour_to_mask(cont, img_shape): 53 """Return mask given a contour and the shape of image 54 """ 55 c = np.loadtxt(cont) 56 mask = np.zeros(img_shape[:-1], dtype=np.uint8) 57 rr, cc = draw.polygon(c[:, 1], c[:, 0]) 58 mask[rr, cc] = 1 59 return mask 60 61 62def _preprocess_labels(data_dir, image_paths, task, expert_choice): 63 gt_dir = os.path.join(data_dir, "ground_truth") 64 os.makedirs(gt_dir, exist_ok=True) 65 66 patient_ids = [Path(image_path).stem for image_path in image_paths] 67 68 input_shape = (1934, 2576, 3) # shape of the input images 69 gt_paths = [] 70 for patient_id in tqdm(patient_ids, desc=f"Converting contours to segmentations for '{expert_choice}'"): 71 gt_contours = sorted( 72 glob(os.path.join(data_dir, "ExpertsSegmentations", "Contours", f"{patient_id}_{task}_{expert_choice}.txt")) 73 ) 74 75 for gt_contour in gt_contours: 76 tmp_task = Path(gt_contour).stem.split("_")[1] 77 gt_path = os.path.join(gt_dir, f"{patient_id}_{tmp_task}_{expert_choice}.tif") 78 gt_paths.append(gt_path) 79 if os.path.exists(gt_path): 80 continue 81 82 semantic_labels = contour_to_mask(cont=gt_contour, img_shape=input_shape) 83 imageio.imwrite(gt_path, semantic_labels) 84 85 return gt_paths 86 87 88def get_papila_paths( 89 path: Union[os.PathLike, str], 90 split: Literal['train', 'val', 'test'], 91 task: Literal["cup", "disc"] = "disc", 92 expert_choice: Literal["exp1", "exp2"] = "exp1", 93 download: bool = False 94) -> Tuple[List[str], List[str]]: 95 """Get paths to the Papila dataset. 96 97 Args: 98 path: Filepath to a folder where the data is downloaded for further processing. 99 split: The choice of data split. 100 task: The choice of labels for specific task. 101 expert_choice: The choice of expert annotator. 102 download: Whether to download the data if it is not present. 103 104 Returns: 105 List of filepaths for the image data. 106 List of filepaths for the label data. 107 """ 108 data_dir = get_papila_data(path=path, download=download) 109 110 assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice." 111 assert task in ["cup", "disc"], f"'{task}' is not a valid task." 112 113 image_paths = sorted(glob(os.path.join(data_dir, "FundusImages", "*.jpg"))) 114 gt_paths = _preprocess_labels(data_dir, image_paths, task, expert_choice) 115 116 if split == "train": 117 image_paths, gt_paths = image_paths[:350], gt_paths[:350] 118 elif split == "val": 119 image_paths, gt_paths = image_paths[350:400], gt_paths[350:400] 120 elif split == "test": 121 image_paths, gt_paths = image_paths[400:], gt_paths[400:] 122 else: 123 raise ValueError(f"'{split}' is not a valid split.") 124 125 assert len(image_paths) == len(gt_paths) and len(image_paths) > 0 126 127 return image_paths, gt_paths 128 129 130def get_papila_dataset( 131 path: Union[os.PathLike, str], 132 patch_shape: Tuple[int, int], 133 split: Literal['train', 'val', 'test'], 134 task: Literal["cup", "disc"] = "disc", 135 expert_choice: Literal["exp1", "exp2"] = "exp1", 136 resize_inputs: bool = False, 137 download: bool = False, 138 **kwargs 139): 140 """Get the Papila dataset for segmentation of optic cup and optic disc in fundus images. 141 142 Args: 143 path: Filepath to a folder where the data is downloaded for further processing. 144 patch_shape: The patch shape to use for training. 145 split: Te choice of data split. 146 task: The choice of labels for specific task. 147 expert_choice: The choice of expert annotator. 148 resize_inputs: Whether to resize the inputs to the expected patch shape. 149 download: Whether to download the data if it is not present. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 151 152 Returns: 153 The segmentation dataset. 154 """ 155 image_paths, gt_paths = get_papila_paths(path, split, task, expert_choice, download) 156 157 if resize_inputs: 158 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 159 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 160 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 161 ) 162 163 dataset = torch_em.default_segmentation_dataset( 164 raw_paths=image_paths, 165 raw_key=None, 166 label_paths=gt_paths, 167 label_key=None, 168 patch_shape=patch_shape, 169 is_seg_dataset=False, 170 **kwargs 171 ) 172 173 return dataset 174 175 176def get_papila_loader( 177 path: Union[os.PathLike, str], 178 batch_size: int, 179 patch_shape: Tuple[int, int], 180 split: Literal['train', 'val', 'test'], 181 task: Literal["cup", "disc"] = "disc", 182 expert_choice: Literal["exp1", "exp2"] = "exp1", 183 resize_inputs: bool = False, 184 download: bool = False, 185 **kwargs 186): 187 """Get the Papila dataloader for segmentation of optic cup and optic disc in fundus images. 188 189 Args: 190 path: Filepath to a folder where the data is downloaded for further processing. 191 batch_size: The batch size for training. 192 patch_shape: The patch shape to use for training. 193 split: The choice of data split. 194 task: The choice of labels for specific task. 195 expert_choice: The choice of expert annotator. 196 resize_inputs: Whether to resize the inputs to the expected patch shape. 197 download: Whether to download the data if it is not present. 198 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 199 200 Returns: 201 The DataLoader. 202 """ 203 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 204 dataset = get_papila_dataset(path, patch_shape, split, task, expert_choice, resize_inputs, download, **ds_kwargs) 205 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
29def get_papila_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the Papila dataset. 31 32 Args: 33 path: Filepath to a folder where the data is downloaded for further processing. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 Filepath where the data is downloaded. 38 """ 39 data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "papila.zip") 46 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 47 util.unzip(zip_path=zip_path, dst=path) 48 49 return data_dir
Download the Papila dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
53def contour_to_mask(cont, img_shape): 54 """Return mask given a contour and the shape of image 55 """ 56 c = np.loadtxt(cont) 57 mask = np.zeros(img_shape[:-1], dtype=np.uint8) 58 rr, cc = draw.polygon(c[:, 1], c[:, 0]) 59 mask[rr, cc] = 1 60 return mask
Return mask given a contour and the shape of image
89def get_papila_paths( 90 path: Union[os.PathLike, str], 91 split: Literal['train', 'val', 'test'], 92 task: Literal["cup", "disc"] = "disc", 93 expert_choice: Literal["exp1", "exp2"] = "exp1", 94 download: bool = False 95) -> Tuple[List[str], List[str]]: 96 """Get paths to the Papila dataset. 97 98 Args: 99 path: Filepath to a folder where the data is downloaded for further processing. 100 split: The choice of data split. 101 task: The choice of labels for specific task. 102 expert_choice: The choice of expert annotator. 103 download: Whether to download the data if it is not present. 104 105 Returns: 106 List of filepaths for the image data. 107 List of filepaths for the label data. 108 """ 109 data_dir = get_papila_data(path=path, download=download) 110 111 assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice." 112 assert task in ["cup", "disc"], f"'{task}' is not a valid task." 113 114 image_paths = sorted(glob(os.path.join(data_dir, "FundusImages", "*.jpg"))) 115 gt_paths = _preprocess_labels(data_dir, image_paths, task, expert_choice) 116 117 if split == "train": 118 image_paths, gt_paths = image_paths[:350], gt_paths[:350] 119 elif split == "val": 120 image_paths, gt_paths = image_paths[350:400], gt_paths[350:400] 121 elif split == "test": 122 image_paths, gt_paths = image_paths[400:], gt_paths[400:] 123 else: 124 raise ValueError(f"'{split}' is not a valid split.") 125 126 assert len(image_paths) == len(gt_paths) and len(image_paths) > 0 127 128 return image_paths, gt_paths
Get paths to the Papila dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- task: The choice of labels for specific task.
- expert_choice: The choice of expert annotator.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
131def get_papila_dataset( 132 path: Union[os.PathLike, str], 133 patch_shape: Tuple[int, int], 134 split: Literal['train', 'val', 'test'], 135 task: Literal["cup", "disc"] = "disc", 136 expert_choice: Literal["exp1", "exp2"] = "exp1", 137 resize_inputs: bool = False, 138 download: bool = False, 139 **kwargs 140): 141 """Get the Papila dataset for segmentation of optic cup and optic disc in fundus images. 142 143 Args: 144 path: Filepath to a folder where the data is downloaded for further processing. 145 patch_shape: The patch shape to use for training. 146 split: Te choice of data split. 147 task: The choice of labels for specific task. 148 expert_choice: The choice of expert annotator. 149 resize_inputs: Whether to resize the inputs to the expected patch shape. 150 download: Whether to download the data if it is not present. 151 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 152 153 Returns: 154 The segmentation dataset. 155 """ 156 image_paths, gt_paths = get_papila_paths(path, split, task, expert_choice, download) 157 158 if resize_inputs: 159 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 160 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 161 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 162 ) 163 164 dataset = torch_em.default_segmentation_dataset( 165 raw_paths=image_paths, 166 raw_key=None, 167 label_paths=gt_paths, 168 label_key=None, 169 patch_shape=patch_shape, 170 is_seg_dataset=False, 171 **kwargs 172 ) 173 174 return dataset
Get the Papila dataset for segmentation of optic cup and optic disc in fundus images.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: Te choice of data split.
- task: The choice of labels for specific task.
- expert_choice: The choice of expert annotator.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
177def get_papila_loader( 178 path: Union[os.PathLike, str], 179 batch_size: int, 180 patch_shape: Tuple[int, int], 181 split: Literal['train', 'val', 'test'], 182 task: Literal["cup", "disc"] = "disc", 183 expert_choice: Literal["exp1", "exp2"] = "exp1", 184 resize_inputs: bool = False, 185 download: bool = False, 186 **kwargs 187): 188 """Get the Papila dataloader for segmentation of optic cup and optic disc in fundus images. 189 190 Args: 191 path: Filepath to a folder where the data is downloaded for further processing. 192 batch_size: The batch size for training. 193 patch_shape: The patch shape to use for training. 194 split: The choice of data split. 195 task: The choice of labels for specific task. 196 expert_choice: The choice of expert annotator. 197 resize_inputs: Whether to resize the inputs to the expected patch shape. 198 download: Whether to download the data if it is not present. 199 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 200 201 Returns: 202 The DataLoader. 203 """ 204 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 205 dataset = get_papila_dataset(path, patch_shape, split, task, expert_choice, resize_inputs, download, **ds_kwargs) 206 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Papila dataloader for segmentation of optic cup and optic disc in fundus images.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- task: The choice of labels for specific task.
- expert_choice: The choice of expert annotator.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.