torch_em.data.datasets.medical.papila

The Papila dataset contains annotations for optic disc and optic cup segmentation in Fundus images.

This dataset is located at https://figshare.com/articles/dataset/PAPILA/14798004/2. The dataset is from the publication https://doi.org/10.1038/s41597-022-01388-1. Please cite it if you use this dataset for your research.

  1"""The Papila dataset contains annotations for optic disc and optic cup
  2segmentation in Fundus images.
  3
  4This dataset is located at https://figshare.com/articles/dataset/PAPILA/14798004/2.
  5The dataset is from the publication https://doi.org/10.1038/s41597-022-01388-1.
  6Please cite it if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from tqdm import tqdm
 12from pathlib import Path
 13from typing import Union, Tuple, Literal, List
 14
 15import numpy as np
 16from skimage import draw
 17import imageio.v3 as imageio
 18
 19import torch_em
 20
 21from .. import util
 22
 23
 24URL = "https://figshare.com/ndownloader/files/35013982"
 25CHECKSUM = "15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d"
 26
 27
 28def get_papila_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the Papila dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the data is downloaded for further processing.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        Filepath where the data is downloaded.
 37    """
 38    data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f")
 39    if os.path.exists(data_dir):
 40        return data_dir
 41
 42    os.makedirs(path, exist_ok=True)
 43
 44    zip_path = os.path.join(path, "papila.zip")
 45    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 46    util.unzip(zip_path=zip_path, dst=path)
 47
 48    return data_dir
 49
 50
 51# contour_to_mask() functions taken from https://github.com/matterport/Mask_RCNN
 52def contour_to_mask(cont, img_shape):
 53    """Return mask given a contour and the shape of image
 54    """
 55    c = np.loadtxt(cont)
 56    mask = np.zeros(img_shape[:-1], dtype=np.uint8)
 57    rr, cc = draw.polygon(c[:, 1], c[:, 0])
 58    mask[rr, cc] = 1
 59    return mask
 60
 61
 62def _preprocess_labels(data_dir, image_paths, task, expert_choice):
 63    gt_dir = os.path.join(data_dir, "ground_truth")
 64    os.makedirs(gt_dir, exist_ok=True)
 65
 66    patient_ids = [Path(image_path).stem for image_path in image_paths]
 67
 68    input_shape = (1934, 2576, 3)  # shape of the input images
 69    gt_paths = []
 70    for patient_id in tqdm(patient_ids, desc=f"Converting contours to segmentations for '{expert_choice}'"):
 71        gt_contours = sorted(
 72            glob(os.path.join(data_dir, "ExpertsSegmentations", "Contours", f"{patient_id}_{task}_{expert_choice}.txt"))
 73        )
 74
 75        for gt_contour in gt_contours:
 76            tmp_task = Path(gt_contour).stem.split("_")[1]
 77            gt_path = os.path.join(gt_dir, f"{patient_id}_{tmp_task}_{expert_choice}.tif")
 78            gt_paths.append(gt_path)
 79            if os.path.exists(gt_path):
 80                continue
 81
 82            semantic_labels = contour_to_mask(cont=gt_contour, img_shape=input_shape)
 83            imageio.imwrite(gt_path, semantic_labels)
 84
 85    return gt_paths
 86
 87
 88def get_papila_paths(
 89    path: Union[os.PathLike, str],
 90    split: Literal['train', 'val', 'test'],
 91    task: Literal["cup", "disc"] = "disc",
 92    expert_choice: Literal["exp1", "exp2"] = "exp1",
 93    download: bool = False
 94) -> Tuple[List[str], List[str]]:
 95    """Get paths to the Papila dataset.
 96
 97    Args:
 98        path: Filepath to a folder where the data is downloaded for further processing.
 99        split: The choice of data split.
100        task: The choice of labels for specific task.
101        expert_choice: The choice of expert annotator.
102        download: Whether to download the data if it is not present.
103
104    Returns:
105        List of filepaths for the image data.
106        List of filepaths for the label data.
107    """
108    data_dir = get_papila_data(path=path, download=download)
109
110    assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice."
111    assert task in ["cup", "disc"], f"'{task}' is not a valid task."
112
113    image_paths = sorted(glob(os.path.join(data_dir, "FundusImages", "*.jpg")))
114    gt_paths = _preprocess_labels(data_dir, image_paths, task, expert_choice)
115
116    if split == "train":
117        image_paths, gt_paths = image_paths[:350], gt_paths[:350]
118    elif split == "val":
119        image_paths, gt_paths = image_paths[350:400], gt_paths[350:400]
120    elif split == "test":
121        image_paths, gt_paths = image_paths[400:], gt_paths[400:]
122    else:
123        raise ValueError(f"'{split}' is not a valid split.")
124
125    assert len(image_paths) == len(gt_paths) and len(image_paths) > 0
126
127    return image_paths, gt_paths
128
129
130def get_papila_dataset(
131    path: Union[os.PathLike, str],
132    patch_shape: Tuple[int, int],
133    split: Literal['train', 'val', 'test'],
134    task: Literal["cup", "disc"] = "disc",
135    expert_choice: Literal["exp1", "exp2"] = "exp1",
136    resize_inputs: bool = False,
137    download: bool = False,
138    **kwargs
139):
140    """Get the Papila dataset for segmentation of optic cup and optic disc in fundus images.
141
142    Args:
143        path: Filepath to a folder where the data is downloaded for further processing.
144        patch_shape: The patch shape to use for training.
145        split: Te choice of data split.
146        task: The choice of labels for specific task.
147        expert_choice: The choice of expert annotator.
148        resize_inputs: Whether to resize the inputs to the expected patch shape.
149        download: Whether to download the data if it is not present.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
151
152    Returns:
153        The segmentation dataset.
154    """
155    image_paths, gt_paths = get_papila_paths(path, split, task, expert_choice, download)
156
157    if resize_inputs:
158        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
159        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
160            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
161        )
162
163    dataset = torch_em.default_segmentation_dataset(
164        raw_paths=image_paths,
165        raw_key=None,
166        label_paths=gt_paths,
167        label_key=None,
168        patch_shape=patch_shape,
169        is_seg_dataset=False,
170        **kwargs
171    )
172
173    return dataset
174
175
176def get_papila_loader(
177    path: Union[os.PathLike, str],
178    batch_size: int,
179    patch_shape: Tuple[int, int],
180    split: Literal['train', 'val', 'test'],
181    task: Literal["cup", "disc"] = "disc",
182    expert_choice: Literal["exp1", "exp2"] = "exp1",
183    resize_inputs: bool = False,
184    download: bool = False,
185    **kwargs
186):
187    """Get the Papila dataloader for segmentation of optic cup and optic disc in fundus images.
188
189    Args:
190        path: Filepath to a folder where the data is downloaded for further processing.
191        batch_size: The batch size for training.
192        patch_shape: The patch shape to use for training.
193        split: The choice of data split.
194        task: The choice of labels for specific task.
195        expert_choice: The choice of expert annotator.
196        resize_inputs: Whether to resize the inputs to the expected patch shape.
197        download: Whether to download the data if it is not present.
198        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
199
200    Returns:
201        The DataLoader.
202    """
203    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
204    dataset = get_papila_dataset(path, patch_shape, split, task, expert_choice, resize_inputs, download, **ds_kwargs)
205    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://figshare.com/ndownloader/files/35013982'
CHECKSUM = '15b053dff496bc8e53eb8a8d0707ef73ba3d56c988eea92b65832c9c82852a7d'
def get_papila_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_papila_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the Papila dataset.
31
32    Args:
33        path: Filepath to a folder where the data is downloaded for further processing.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        Filepath where the data is downloaded.
38    """
39    data_dir = os.path.join(path, "PapilaDB-PAPILA-17f8fa7746adb20275b5b6a0d99dc9dfe3007e9f")
40    if os.path.exists(data_dir):
41        return data_dir
42
43    os.makedirs(path, exist_ok=True)
44
45    zip_path = os.path.join(path, "papila.zip")
46    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
47    util.unzip(zip_path=zip_path, dst=path)
48
49    return data_dir

Download the Papila dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def contour_to_mask(cont, img_shape):
53def contour_to_mask(cont, img_shape):
54    """Return mask given a contour and the shape of image
55    """
56    c = np.loadtxt(cont)
57    mask = np.zeros(img_shape[:-1], dtype=np.uint8)
58    rr, cc = draw.polygon(c[:, 1], c[:, 0])
59    mask[rr, cc] = 1
60    return mask

Return mask given a contour and the shape of image

def get_papila_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], task: Literal['cup', 'disc'] = 'disc', expert_choice: Literal['exp1', 'exp2'] = 'exp1', download: bool = False) -> Tuple[List[str], List[str]]:
 89def get_papila_paths(
 90    path: Union[os.PathLike, str],
 91    split: Literal['train', 'val', 'test'],
 92    task: Literal["cup", "disc"] = "disc",
 93    expert_choice: Literal["exp1", "exp2"] = "exp1",
 94    download: bool = False
 95) -> Tuple[List[str], List[str]]:
 96    """Get paths to the Papila dataset.
 97
 98    Args:
 99        path: Filepath to a folder where the data is downloaded for further processing.
100        split: The choice of data split.
101        task: The choice of labels for specific task.
102        expert_choice: The choice of expert annotator.
103        download: Whether to download the data if it is not present.
104
105    Returns:
106        List of filepaths for the image data.
107        List of filepaths for the label data.
108    """
109    data_dir = get_papila_data(path=path, download=download)
110
111    assert expert_choice in ["exp1", "exp2"], f"'{expert_choice}' is not a valid expert choice."
112    assert task in ["cup", "disc"], f"'{task}' is not a valid task."
113
114    image_paths = sorted(glob(os.path.join(data_dir, "FundusImages", "*.jpg")))
115    gt_paths = _preprocess_labels(data_dir, image_paths, task, expert_choice)
116
117    if split == "train":
118        image_paths, gt_paths = image_paths[:350], gt_paths[:350]
119    elif split == "val":
120        image_paths, gt_paths = image_paths[350:400], gt_paths[350:400]
121    elif split == "test":
122        image_paths, gt_paths = image_paths[400:], gt_paths[400:]
123    else:
124        raise ValueError(f"'{split}' is not a valid split.")
125
126    assert len(image_paths) == len(gt_paths) and len(image_paths) > 0
127
128    return image_paths, gt_paths

Get paths to the Papila dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The choice of data split.
  • task: The choice of labels for specific task.
  • expert_choice: The choice of expert annotator.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_papila_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], task: Literal['cup', 'disc'] = 'disc', expert_choice: Literal['exp1', 'exp2'] = 'exp1', resize_inputs: bool = False, download: bool = False, **kwargs):
131def get_papila_dataset(
132    path: Union[os.PathLike, str],
133    patch_shape: Tuple[int, int],
134    split: Literal['train', 'val', 'test'],
135    task: Literal["cup", "disc"] = "disc",
136    expert_choice: Literal["exp1", "exp2"] = "exp1",
137    resize_inputs: bool = False,
138    download: bool = False,
139    **kwargs
140):
141    """Get the Papila dataset for segmentation of optic cup and optic disc in fundus images.
142
143    Args:
144        path: Filepath to a folder where the data is downloaded for further processing.
145        patch_shape: The patch shape to use for training.
146        split: Te choice of data split.
147        task: The choice of labels for specific task.
148        expert_choice: The choice of expert annotator.
149        resize_inputs: Whether to resize the inputs to the expected patch shape.
150        download: Whether to download the data if it is not present.
151        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
152
153    Returns:
154        The segmentation dataset.
155    """
156    image_paths, gt_paths = get_papila_paths(path, split, task, expert_choice, download)
157
158    if resize_inputs:
159        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
160        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
161            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
162        )
163
164    dataset = torch_em.default_segmentation_dataset(
165        raw_paths=image_paths,
166        raw_key=None,
167        label_paths=gt_paths,
168        label_key=None,
169        patch_shape=patch_shape,
170        is_seg_dataset=False,
171        **kwargs
172    )
173
174    return dataset

Get the Papila dataset for segmentation of optic cup and optic disc in fundus images.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: Te choice of data split.
  • task: The choice of labels for specific task.
  • expert_choice: The choice of expert annotator.
  • resize_inputs: Whether to resize the inputs to the expected patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_papila_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], task: Literal['cup', 'disc'] = 'disc', expert_choice: Literal['exp1', 'exp2'] = 'exp1', resize_inputs: bool = False, download: bool = False, **kwargs):
177def get_papila_loader(
178    path: Union[os.PathLike, str],
179    batch_size: int,
180    patch_shape: Tuple[int, int],
181    split: Literal['train', 'val', 'test'],
182    task: Literal["cup", "disc"] = "disc",
183    expert_choice: Literal["exp1", "exp2"] = "exp1",
184    resize_inputs: bool = False,
185    download: bool = False,
186    **kwargs
187):
188    """Get the Papila dataloader for segmentation of optic cup and optic disc in fundus images.
189
190    Args:
191        path: Filepath to a folder where the data is downloaded for further processing.
192        batch_size: The batch size for training.
193        patch_shape: The patch shape to use for training.
194        split: The choice of data split.
195        task: The choice of labels for specific task.
196        expert_choice: The choice of expert annotator.
197        resize_inputs: Whether to resize the inputs to the expected patch shape.
198        download: Whether to download the data if it is not present.
199        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
200
201    Returns:
202        The DataLoader.
203    """
204    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
205    dataset = get_papila_dataset(path, patch_shape, split, task, expert_choice, resize_inputs, download, **ds_kwargs)
206    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Papila dataloader for segmentation of optic cup and optic disc in fundus images.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • task: The choice of labels for specific task.
  • expert_choice: The choice of expert annotator.
  • resize_inputs: Whether to resize the inputs to the expected patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.