torch_em.data.datasets.medical.idrid

The IDRID dataset contains annotations for retinal lesions and optic disc segmentation in Fundus images.

The database is located at https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid The dataloader makes use of an open-source version of the original dataset hosted on Kaggle.

The dataset is from the IDRiD challenge:

https://idrid.grand-challenge.org/
Porwal et al. - https://doi.org/10.1016/j.media.2019.101561 Please cite them if you use this dataset for your research.

View Source

  1"""The IDRID dataset contains annotations for retinal lesions and optic disc segmentation
  2in Fundus images.
  3
  4The database is located at https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid
  5The dataloader makes use of an open-source version of the original dataset hosted on Kaggle.
  6
  7The dataset is from the IDRiD challenge:
  8- https://idrid.grand-challenge.org/
  9- Porwal et al. - https://doi.org/10.1016/j.media.2019.101561
 10Please cite them if you use this dataset for your research.
 11"""
 12
 13import os
 14from glob import glob
 15from pathlib import Path
 16from typing import Union, Tuple, Literal, List
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25TASKS = {
 26    "microaneurysms": r"1. Microaneurysms",
 27    "haemorrhages": r"2. Haemorrhages",
 28    "hard_exudates": r"3. Hard Exudates",
 29    "soft_exudates": r"4. Soft Exudates",
 30    "optic_disc": r"5. Optic Disc"
 31}
 32
 33
 34def get_idrid_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 35    """Download the IDRID dataset.
 36
 37    Args:
 38        path: Filepath to a folder where the data is downloaded for further processing.
 39        download: Whether to download the data if it is not present.
 40
 41    Returns:
 42        Filepath where the data is downloaded.
 43    """
 44    data_dir = os.path.join(path, "data", "A.%20Segmentation")
 45    if os.path.exists(data_dir):
 46        return data_dir
 47
 48    os.makedirs(path, exist_ok=True)
 49
 50    util.download_source_kaggle(
 51        path=path, dataset_name="aaryapatel98/indian-diabetic-retinopathy-image-dataset", download=download,
 52    )
 53    zip_path = os.path.join(path, "indian-diabetic-retinopathy-image-dataset.zip")
 54    util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"))
 55
 56    return data_dir
 57
 58
 59def get_idrid_paths(
 60    path: Union[os.PathLike, str],
 61    split: Literal['train', 'test'],
 62    task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'],
 63    download: bool = False
 64) -> Tuple[List[str], List[str]]:
 65    """Get paths to the IDRID data.
 66
 67    Args:
 68        path: Filepath to a folder where the data is downloaded for further processing.
 69        split: The choice of data split.
 70        task: The choice of labels for the specific task.
 71        download: Whether to download the data if it is not present.
 72
 73    Returns:
 74        List of filepaths for the image data.
 75        List of filepaths for the label data.
 76    """
 77    data_dir = get_idrid_data(path=path, download=download)
 78
 79    assert split in ["train", "test"]
 80    assert task in list(TASKS.keys())
 81
 82    split = r"a. Training Set" if split == "train" else r"b. Testing Set"
 83    gt_paths = sorted(
 84        glob(
 85            os.path.join(data_dir, r"A. Segmentation", r"2. All Segmentation Groundtruths", split, TASKS[task], "*.tif")
 86        )
 87    )
 88
 89    image_dir = os.path.join(data_dir, r"A. Segmentation", r"1. Original Images", split)
 90    image_paths = [os.path.join(image_dir, f"{Path(p).stem[:-3]}.jpg") for p in gt_paths]
 91
 92    return image_paths, gt_paths
 93
 94
 95def get_idrid_dataset(
 96    path: Union[os.PathLike, str],
 97    patch_shape: Tuple[int, int],
 98    split: Literal['train', 'test'],
 99    task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc',
100    resize_inputs: bool = False,
101    download: bool = False,
102    **kwargs
103) -> Dataset:
104    """Get the IDRID dataset for segmentation of retinal lesions and optic disc in fundus images.
105
106    Args:
107        path: Filepath to a folder where the data is downloaded for further processing.
108        patch_shape: The patch shape to use for training.
109        split: The choice of data split.
110        task: The choice of labels for the specific task.
111        resize_inputs: Whether to resize the inputs to the expected patch shape.
112        download: Whether to download the data if it is not present.
113        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
114
115    Returns:
116        The segmentation dataset.
117    """
118    image_paths, gt_paths = get_idrid_paths(path, split, task, download)
119
120    if resize_inputs:
121        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
122        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
123            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
124        )
125
126    return torch_em.default_segmentation_dataset(
127        raw_paths=image_paths,
128        raw_key=None,
129        label_paths=gt_paths,
130        label_key=None,
131        patch_shape=patch_shape,
132        is_seg_dataset=False,
133        **kwargs
134    )
135
136
137def get_idrid_loader(
138    path: Union[os.PathLike, str],
139    batch_size: int,
140    patch_shape: Tuple[int, int],
141    split: Literal['train', 'test'],
142    task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc',
143    resize_inputs: bool = False,
144    download: bool = False,
145    **kwargs
146) -> DataLoader:
147    """Get the IDRID dataloader for segmentation of retinal lesions and optic disc in fundus images.
148
149    Args:
150        path: Filepath to a folder where the data is downloaded for further processing.
151        batch_size: The batch size for training.
152        patch_shape: The patch shape to use for training.
153        split: The choice of data split.
154        task: The choice of labels for the specific task.
155        resize_inputs: Whether to resize the inputs to the expected patch shape.
156        download: Whether to download the data if it is not present.
157        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
158
159    Returns:
160        The DataLoader.
161    """
162    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
163    dataset = get_idrid_dataset(path, patch_shape, split, task, resize_inputs, download, **ds_kwargs)
164    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

TASKS = {'microaneurysms': '1. Microaneurysms', 'haemorrhages': '2. Haemorrhages', 'hard_exudates': '3. Hard Exudates', 'soft_exudates': '4. Soft Exudates', 'optic_disc': '5. Optic Disc'}

def get_idrid_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

35def get_idrid_data(path: Union[os.PathLike, str], download: bool = False) -> str:
36    """Download the IDRID dataset.
37
38    Args:
39        path: Filepath to a folder where the data is downloaded for further processing.
40        download: Whether to download the data if it is not present.
41
42    Returns:
43        Filepath where the data is downloaded.
44    """
45    data_dir = os.path.join(path, "data", "A.%20Segmentation")
46    if os.path.exists(data_dir):
47        return data_dir
48
49    os.makedirs(path, exist_ok=True)
50
51    util.download_source_kaggle(
52        path=path, dataset_name="aaryapatel98/indian-diabetic-retinopathy-image-dataset", download=download,
53    )
54    zip_path = os.path.join(path, "indian-diabetic-retinopathy-image-dataset.zip")
55    util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"))
56
57    return data_dir

Download the IDRID dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_idrid_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

60def get_idrid_paths(
61    path: Union[os.PathLike, str],
62    split: Literal['train', 'test'],
63    task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'],
64    download: bool = False
65) -> Tuple[List[str], List[str]]:
66    """Get paths to the IDRID data.
67
68    Args:
69        path: Filepath to a folder where the data is downloaded for further processing.
70        split: The choice of data split.
71        task: The choice of labels for the specific task.
72        download: Whether to download the data if it is not present.
73
74    Returns:
75        List of filepaths for the image data.
76        List of filepaths for the label data.
77    """
78    data_dir = get_idrid_data(path=path, download=download)
79
80    assert split in ["train", "test"]
81    assert task in list(TASKS.keys())
82
83    split = r"a. Training Set" if split == "train" else r"b. Testing Set"
84    gt_paths = sorted(
85        glob(
86            os.path.join(data_dir, r"A. Segmentation", r"2. All Segmentation Groundtruths", split, TASKS[task], "*.tif")
87        )
88    )
89
90    image_dir = os.path.join(data_dir, r"A. Segmentation", r"1. Original Images", split)
91    image_paths = [os.path.join(image_dir, f"{Path(p).stem[:-3]}.jpg") for p in gt_paths]
92
93    return image_paths, gt_paths

Get paths to the IDRID data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
task: The choice of labels for the specific task.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_idrid_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 96def get_idrid_dataset(
 97    path: Union[os.PathLike, str],
 98    patch_shape: Tuple[int, int],
 99    split: Literal['train', 'test'],
100    task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc',
101    resize_inputs: bool = False,
102    download: bool = False,
103    **kwargs
104) -> Dataset:
105    """Get the IDRID dataset for segmentation of retinal lesions and optic disc in fundus images.
106
107    Args:
108        path: Filepath to a folder where the data is downloaded for further processing.
109        patch_shape: The patch shape to use for training.
110        split: The choice of data split.
111        task: The choice of labels for the specific task.
112        resize_inputs: Whether to resize the inputs to the expected patch shape.
113        download: Whether to download the data if it is not present.
114        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
115
116    Returns:
117        The segmentation dataset.
118    """
119    image_paths, gt_paths = get_idrid_paths(path, split, task, download)
120
121    if resize_inputs:
122        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
123        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
124            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
125        )
126
127    return torch_em.default_segmentation_dataset(
128        raw_paths=image_paths,
129        raw_key=None,
130        label_paths=gt_paths,
131        label_key=None,
132        patch_shape=patch_shape,
133        is_seg_dataset=False,
134        **kwargs
135    )

Get the IDRID dataset for segmentation of retinal lesions and optic disc in fundus images.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
split: The choice of data split.
task: The choice of labels for the specific task.
resize_inputs: Whether to resize the inputs to the expected patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_idrid_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

138def get_idrid_loader(
139    path: Union[os.PathLike, str],
140    batch_size: int,
141    patch_shape: Tuple[int, int],
142    split: Literal['train', 'test'],
143    task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc',
144    resize_inputs: bool = False,
145    download: bool = False,
146    **kwargs
147) -> DataLoader:
148    """Get the IDRID dataloader for segmentation of retinal lesions and optic disc in fundus images.
149
150    Args:
151        path: Filepath to a folder where the data is downloaded for further processing.
152        batch_size: The batch size for training.
153        patch_shape: The patch shape to use for training.
154        split: The choice of data split.
155        task: The choice of labels for the specific task.
156        resize_inputs: Whether to resize the inputs to the expected patch shape.
157        download: Whether to download the data if it is not present.
158        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
159
160    Returns:
161        The DataLoader.
162    """
163    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
164    dataset = get_idrid_dataset(path, patch_shape, split, task, resize_inputs, download, **ds_kwargs)
165    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the IDRID dataloader for segmentation of retinal lesions and optic disc in fundus images.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
task: The choice of labels for the specific task.
resize_inputs: Whether to resize the inputs to the expected patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.