torch_em.data.datasets.medical.papila
1import os 2from glob import glob 3from tqdm import tqdm 4from pathlib import Path 5from typing import Union, Tuple 6 7import numpy as np 8from skimage import draw 9import imageio.v3 as imageio 10 11import torch_em 12from torch_em.transform.generic import ResizeInputs 13 14from .. import util 15from ... import ImageCollectionDataset 16 17 18URL = "https://figshare.com/ndownloader/files/35013982" 19CHECKSUM = "15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d" 20 21 22def get_papila_data(path, download): 23 os.makedirs(path, exist_ok=True) 24 25 data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f") 26 if os.path.exists(data_dir): 27 return data_dir 28 29 zip_path = os.path.join(path, "papila.zip") 30 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 31 util.unzip(zip_path=zip_path, dst=path) 32 33 return data_dir 34 35 36# contour_to_mask() functions taken from https://github.com/matterport/Mask_RCNN 37def contour_to_mask(cont, img_shape): 38 """Return mask given a contour and the shape of image 39 """ 40 c = np.loadtxt(cont) 41 mask = np.zeros(img_shape[:-1], dtype=np.uint8) 42 rr, cc = draw.polygon(c[:, 1], c[:, 0]) 43 mask[rr, cc] = 1 44 return mask 45 46 47def _get_papila_paths(path, task, expert_choice, download): 48 data_dir = get_papila_data(path=path, download=download) 49 50 image_paths = sorted(glob(os.path.join(data_dir, "FundusImages", "*.jpg"))) 51 52 gt_dir = os.path.join(data_dir, "ground_truth") 53 if os.path.exists(gt_dir): 54 gt_paths = sorted(glob(os.path.join(gt_dir, f"*_{task}.tif"))) 55 return image_paths, gt_paths 56 57 os.makedirs(gt_dir, exist_ok=True) 58 59 if task is None: # we get the binary segmentations for both disc and cup 60 task = "*" 61 62 patient_ids = [Path(image_path).stem for image_path in image_paths] 63 64 input_shape = (1934, 2576, 3) # shape of the input images 65 gt_paths = [] 66 for patient_id in tqdm(patient_ids, desc=f"Converting contours to segmentations for '{expert_choice}'"): 67 gt_contours = sorted( 68 glob(os.path.join(data_dir, "ExpertsSegmentations", "Contours", f"{patient_id}_{task}_{expert_choice}.txt")) 69 ) 70 71 assert len(gt_contours) == (4 if task is None else 2) 72 73 for gt_contour in gt_contours: 74 tmp_task = Path(gt_contour).stem.split("_")[1] 75 gt_path = os.path.join(gt_dir, f"{patient_id}_{tmp_task}.tif") 76 gt_paths.append(gt_path) 77 if os.path.exists(gt_path): 78 continue 79 80 semantic_labels = contour_to_mask(cont=gt_contour, img_shape=input_shape) 81 imageio.imwrite(gt_path, semantic_labels) 82 83 return image_paths, gt_paths 84 85 86def get_papila_dataset( 87 path: Union[os.PathLike, str], 88 patch_shape: Tuple[int, int], 89 task: str = "disc", 90 expert_choice: str = "exp1", 91 resize_inputs: bool = False, 92 download: bool = False, 93 **kwargs 94): 95 """Dataset for segmentation of optic cup and optic disc in fundus images. 96 97 The database is located at https://figshare.com/articles/dataset/PAPILA/14798004/2 98 99 The dataset is from Kovalyk et al. - https://doi.org/10.1038/s41597-022-01388-1. 100 Please cite it if you use this dataset for a publication. 101 """ 102 assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice." 103 104 if task is not None: 105 assert task in ["cup", "disc"], f"'{task}' is not a valid task." 106 107 image_paths, gt_paths = _get_papila_paths(path=path, task=task, expert_choice=expert_choice, download=download) 108 109 if resize_inputs: 110 raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True) 111 label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True) 112 patch_shape = None 113 else: 114 patch_shape = patch_shape 115 raw_trafo, label_trafo = None, None 116 117 dataset = ImageCollectionDataset( 118 raw_image_paths=image_paths, 119 label_image_paths=gt_paths, 120 patch_shape=patch_shape, 121 raw_transform=raw_trafo, 122 label_transform=label_trafo, 123 **kwargs 124 ) 125 126 return dataset 127 128 129def get_papila_loader( 130 path: Union[os.PathLike, str], 131 patch_shape: Tuple[int, int], 132 batch_size: int, 133 task: str = "disc", 134 expert_choice: str = "exp1", 135 resize_inputs: bool = False, 136 download: bool = False, 137 **kwargs 138): 139 """Dataloader for segmentation of optic cup and optic disc in fundus images. See `get_papila_dataset` for details. 140 """ 141 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 142 dataset = get_papila_dataset( 143 path=path, 144 patch_shape=patch_shape, 145 task=task, 146 expert_choice=expert_choice, 147 resize_inputs=resize_inputs, 148 download=download, 149 **ds_kwargs 150 ) 151 loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) 152 return loader
URL =
'https://figshare.com/ndownloader/files/35013982'
CHECKSUM =
'15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d'
def
get_papila_data(path, download):
23def get_papila_data(path, download): 24 os.makedirs(path, exist_ok=True) 25 26 data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f") 27 if os.path.exists(data_dir): 28 return data_dir 29 30 zip_path = os.path.join(path, "papila.zip") 31 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 32 util.unzip(zip_path=zip_path, dst=path) 33 34 return data_dir
def
contour_to_mask(cont, img_shape):
38def contour_to_mask(cont, img_shape): 39 """Return mask given a contour and the shape of image 40 """ 41 c = np.loadtxt(cont) 42 mask = np.zeros(img_shape[:-1], dtype=np.uint8) 43 rr, cc = draw.polygon(c[:, 1], c[:, 0]) 44 mask[rr, cc] = 1 45 return mask
Return mask given a contour and the shape of image
def
get_papila_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], task: str = 'disc', expert_choice: str = 'exp1', resize_inputs: bool = False, download: bool = False, **kwargs):
87def get_papila_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, int], 90 task: str = "disc", 91 expert_choice: str = "exp1", 92 resize_inputs: bool = False, 93 download: bool = False, 94 **kwargs 95): 96 """Dataset for segmentation of optic cup and optic disc in fundus images. 97 98 The database is located at https://figshare.com/articles/dataset/PAPILA/14798004/2 99 100 The dataset is from Kovalyk et al. - https://doi.org/10.1038/s41597-022-01388-1. 101 Please cite it if you use this dataset for a publication. 102 """ 103 assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice." 104 105 if task is not None: 106 assert task in ["cup", "disc"], f"'{task}' is not a valid task." 107 108 image_paths, gt_paths = _get_papila_paths(path=path, task=task, expert_choice=expert_choice, download=download) 109 110 if resize_inputs: 111 raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True) 112 label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True) 113 patch_shape = None 114 else: 115 patch_shape = patch_shape 116 raw_trafo, label_trafo = None, None 117 118 dataset = ImageCollectionDataset( 119 raw_image_paths=image_paths, 120 label_image_paths=gt_paths, 121 patch_shape=patch_shape, 122 raw_transform=raw_trafo, 123 label_transform=label_trafo, 124 **kwargs 125 ) 126 127 return dataset
Dataset for segmentation of optic cup and optic disc in fundus images.
The database is located at https://figshare.com/articles/dataset/PAPILA/14798004/2
The dataset is from Kovalyk et al. - https://doi.org/10.1038/s41597-022-01388-1. Please cite it if you use this dataset for a publication.
def
get_papila_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, task: str = 'disc', expert_choice: str = 'exp1', resize_inputs: bool = False, download: bool = False, **kwargs):
130def get_papila_loader( 131 path: Union[os.PathLike, str], 132 patch_shape: Tuple[int, int], 133 batch_size: int, 134 task: str = "disc", 135 expert_choice: str = "exp1", 136 resize_inputs: bool = False, 137 download: bool = False, 138 **kwargs 139): 140 """Dataloader for segmentation of optic cup and optic disc in fundus images. See `get_papila_dataset` for details. 141 """ 142 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 143 dataset = get_papila_dataset( 144 path=path, 145 patch_shape=patch_shape, 146 task=task, 147 expert_choice=expert_choice, 148 resize_inputs=resize_inputs, 149 download=download, 150 **ds_kwargs 151 ) 152 loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) 153 return loader
Dataloader for segmentation of optic cup and optic disc in fundus images. See get_papila_dataset
for details.