torch_em.data.datasets.medical.papila

  1import os
  2from glob import glob
  3from tqdm import tqdm
  4from pathlib import Path
  5from typing import Union, Tuple
  6
  7import numpy as np
  8from skimage import draw
  9import imageio.v3 as imageio
 10
 11import torch_em
 12from torch_em.transform.generic import ResizeInputs
 13
 14from .. import util
 15from ... import ImageCollectionDataset
 16
 17
 18URL = "https://figshare.com/ndownloader/files/35013982"
 19CHECKSUM = "15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d"
 20
 21
 22def get_papila_data(path, download):
 23    os.makedirs(path, exist_ok=True)
 24
 25    data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f")
 26    if os.path.exists(data_dir):
 27        return data_dir
 28
 29    zip_path = os.path.join(path, "papila.zip")
 30    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 31    util.unzip(zip_path=zip_path, dst=path)
 32
 33    return data_dir
 34
 35
 36# contour_to_mask() functions taken from https://github.com/matterport/Mask_RCNN
 37def contour_to_mask(cont, img_shape):
 38    """Return mask given a contour and the shape of image
 39    """
 40    c = np.loadtxt(cont)
 41    mask = np.zeros(img_shape[:-1], dtype=np.uint8)
 42    rr, cc = draw.polygon(c[:, 1], c[:, 0])
 43    mask[rr, cc] = 1
 44    return mask
 45
 46
 47def _get_papila_paths(path, task, expert_choice, download):
 48    data_dir = get_papila_data(path=path, download=download)
 49
 50    image_paths = sorted(glob(os.path.join(data_dir, "FundusImages", "*.jpg")))
 51
 52    gt_dir = os.path.join(data_dir, "ground_truth")
 53    if os.path.exists(gt_dir):
 54        gt_paths = sorted(glob(os.path.join(gt_dir, f"*_{task}.tif")))
 55        return image_paths, gt_paths
 56
 57    os.makedirs(gt_dir, exist_ok=True)
 58
 59    if task is None:  # we get the binary segmentations for both disc and cup
 60        task = "*"
 61
 62    patient_ids = [Path(image_path).stem for image_path in image_paths]
 63
 64    input_shape = (1934, 2576, 3)  # shape of the input images
 65    gt_paths = []
 66    for patient_id in tqdm(patient_ids, desc=f"Converting contours to segmentations for '{expert_choice}'"):
 67        gt_contours = sorted(
 68            glob(os.path.join(data_dir, "ExpertsSegmentations", "Contours", f"{patient_id}_{task}_{expert_choice}.txt"))
 69        )
 70
 71        assert len(gt_contours) == (4 if task is None else 2)
 72
 73        for gt_contour in gt_contours:
 74            tmp_task = Path(gt_contour).stem.split("_")[1]
 75            gt_path = os.path.join(gt_dir, f"{patient_id}_{tmp_task}.tif")
 76            gt_paths.append(gt_path)
 77            if os.path.exists(gt_path):
 78                continue
 79
 80            semantic_labels = contour_to_mask(cont=gt_contour, img_shape=input_shape)
 81            imageio.imwrite(gt_path, semantic_labels)
 82
 83    return image_paths, gt_paths
 84
 85
 86def get_papila_dataset(
 87    path: Union[os.PathLike, str],
 88    patch_shape: Tuple[int, int],
 89    task: str = "disc",
 90    expert_choice: str = "exp1",
 91    resize_inputs: bool = False,
 92    download: bool = False,
 93    **kwargs
 94):
 95    """Dataset for segmentation of optic cup and optic disc in fundus images.
 96
 97    The database is located at https://figshare.com/articles/dataset/PAPILA/14798004/2
 98
 99    The dataset is from Kovalyk et al. - https://doi.org/10.1038/s41597-022-01388-1.
100    Please cite it if you use this dataset for a publication.
101    """
102    assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice."
103
104    if task is not None:
105        assert task in ["cup", "disc"], f"'{task}' is not a valid task."
106
107    image_paths, gt_paths = _get_papila_paths(path=path, task=task, expert_choice=expert_choice, download=download)
108
109    if resize_inputs:
110        raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True)
111        label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True)
112        patch_shape = None
113    else:
114        patch_shape = patch_shape
115        raw_trafo, label_trafo = None, None
116
117    dataset = ImageCollectionDataset(
118        raw_image_paths=image_paths,
119        label_image_paths=gt_paths,
120        patch_shape=patch_shape,
121        raw_transform=raw_trafo,
122        label_transform=label_trafo,
123        **kwargs
124    )
125
126    return dataset
127
128
129def get_papila_loader(
130    path: Union[os.PathLike, str],
131    patch_shape: Tuple[int, int],
132    batch_size: int,
133    task: str = "disc",
134    expert_choice: str = "exp1",
135    resize_inputs: bool = False,
136    download: bool = False,
137    **kwargs
138):
139    """Dataloader for segmentation of optic cup and optic disc in fundus images. See `get_papila_dataset` for details.
140    """
141    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
142    dataset = get_papila_dataset(
143        path=path,
144        patch_shape=patch_shape,
145        task=task,
146        expert_choice=expert_choice,
147        resize_inputs=resize_inputs,
148        download=download,
149        **ds_kwargs
150    )
151    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
152    return loader
URL = 'https://figshare.com/ndownloader/files/35013982'
CHECKSUM = '15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d'
def get_papila_data(path, download):
23def get_papila_data(path, download):
24    os.makedirs(path, exist_ok=True)
25
26    data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f")
27    if os.path.exists(data_dir):
28        return data_dir
29
30    zip_path = os.path.join(path, "papila.zip")
31    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
32    util.unzip(zip_path=zip_path, dst=path)
33
34    return data_dir
def contour_to_mask(cont, img_shape):
38def contour_to_mask(cont, img_shape):
39    """Return mask given a contour and the shape of image
40    """
41    c = np.loadtxt(cont)
42    mask = np.zeros(img_shape[:-1], dtype=np.uint8)
43    rr, cc = draw.polygon(c[:, 1], c[:, 0])
44    mask[rr, cc] = 1
45    return mask

Return mask given a contour and the shape of image

def get_papila_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], task: str = 'disc', expert_choice: str = 'exp1', resize_inputs: bool = False, download: bool = False, **kwargs):
 87def get_papila_dataset(
 88    path: Union[os.PathLike, str],
 89    patch_shape: Tuple[int, int],
 90    task: str = "disc",
 91    expert_choice: str = "exp1",
 92    resize_inputs: bool = False,
 93    download: bool = False,
 94    **kwargs
 95):
 96    """Dataset for segmentation of optic cup and optic disc in fundus images.
 97
 98    The database is located at https://figshare.com/articles/dataset/PAPILA/14798004/2
 99
100    The dataset is from Kovalyk et al. - https://doi.org/10.1038/s41597-022-01388-1.
101    Please cite it if you use this dataset for a publication.
102    """
103    assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice."
104
105    if task is not None:
106        assert task in ["cup", "disc"], f"'{task}' is not a valid task."
107
108    image_paths, gt_paths = _get_papila_paths(path=path, task=task, expert_choice=expert_choice, download=download)
109
110    if resize_inputs:
111        raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True)
112        label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True)
113        patch_shape = None
114    else:
115        patch_shape = patch_shape
116        raw_trafo, label_trafo = None, None
117
118    dataset = ImageCollectionDataset(
119        raw_image_paths=image_paths,
120        label_image_paths=gt_paths,
121        patch_shape=patch_shape,
122        raw_transform=raw_trafo,
123        label_transform=label_trafo,
124        **kwargs
125    )
126
127    return dataset

Dataset for segmentation of optic cup and optic disc in fundus images.

The database is located at https://figshare.com/articles/dataset/PAPILA/14798004/2

The dataset is from Kovalyk et al. - https://doi.org/10.1038/s41597-022-01388-1. Please cite it if you use this dataset for a publication.

def get_papila_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, task: str = 'disc', expert_choice: str = 'exp1', resize_inputs: bool = False, download: bool = False, **kwargs):
130def get_papila_loader(
131    path: Union[os.PathLike, str],
132    patch_shape: Tuple[int, int],
133    batch_size: int,
134    task: str = "disc",
135    expert_choice: str = "exp1",
136    resize_inputs: bool = False,
137    download: bool = False,
138    **kwargs
139):
140    """Dataloader for segmentation of optic cup and optic disc in fundus images. See `get_papila_dataset` for details.
141    """
142    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
143    dataset = get_papila_dataset(
144        path=path,
145        patch_shape=patch_shape,
146        task=task,
147        expert_choice=expert_choice,
148        resize_inputs=resize_inputs,
149        download=download,
150        **ds_kwargs
151    )
152    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
153    return loader

Dataloader for segmentation of optic cup and optic disc in fundus images. See get_papila_dataset for details.