torch_em.data.datasets.medical.idrid
The IDRID dataset contains annotations for retinal lesions and optic disc segmentation in Fundus images.
The database is located at https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid The dataloader makes use of an open-source version of the original dataset hosted on Kaggle.
The dataset is from the IDRiD challenge:
- https://idrid.grand-challenge.org/
- Porwal et al. - https://doi.org/10.1016/j.media.2019.101561 Please cite them if you use this dataset for your research.
1"""The IDRID dataset contains annotations for retinal lesions and optic disc segmentation 2in Fundus images. 3 4The database is located at https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid 5The dataloader makes use of an open-source version of the original dataset hosted on Kaggle. 6 7The dataset is from the IDRiD challenge: 8- https://idrid.grand-challenge.org/ 9- Porwal et al. - https://doi.org/10.1016/j.media.2019.101561 10Please cite them if you use this dataset for your research. 11""" 12 13import os 14from glob import glob 15from pathlib import Path 16from typing import Union, Tuple, Literal, List 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25TASKS = { 26 "microaneurysms": r"1. Microaneurysms", 27 "haemorrhages": r"2. Haemorrhages", 28 "hard_exudates": r"3. Hard Exudates", 29 "soft_exudates": r"4. Soft Exudates", 30 "optic_disc": r"5. Optic Disc" 31} 32 33 34def get_idrid_data(path: Union[os.PathLike, str], download: bool = False) -> str: 35 """Download the IDRID dataset. 36 37 Args: 38 path: Filepath to a folder where the data is downloaded for further processing. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 Filepath where the data is downloaded. 43 """ 44 data_dir = os.path.join(path, "data", "A.%20Segmentation") 45 if os.path.exists(data_dir): 46 return data_dir 47 48 os.makedirs(path, exist_ok=True) 49 50 util.download_source_kaggle( 51 path=path, dataset_name="aaryapatel98/indian-diabetic-retinopathy-image-dataset", download=download, 52 ) 53 zip_path = os.path.join(path, "indian-diabetic-retinopathy-image-dataset.zip") 54 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data")) 55 56 return data_dir 57 58 59def get_idrid_paths( 60 path: Union[os.PathLike, str], 61 split: Literal['train', 'test'], 62 task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'], 63 download: bool = False 64) -> Tuple[List[str], List[str]]: 65 """Get paths to the IDRID data. 66 67 Args: 68 path: Filepath to a folder where the data is downloaded for further processing. 69 split: The choice of data split. 70 task: The choice of labels for the specific task. 71 download: Whether to download the data if it is not present. 72 73 Returns: 74 List of filepaths for the image data. 75 List of filepaths for the label data. 76 """ 77 data_dir = get_idrid_data(path=path, download=download) 78 79 assert split in ["train", "test"] 80 assert task in list(TASKS.keys()) 81 82 split = r"a. Training Set" if split == "train" else r"b. Testing Set" 83 gt_paths = sorted( 84 glob( 85 os.path.join(data_dir, r"A. Segmentation", r"2. All Segmentation Groundtruths", split, TASKS[task], "*.tif") 86 ) 87 ) 88 89 image_dir = os.path.join(data_dir, r"A. Segmentation", r"1. Original Images", split) 90 image_paths = [os.path.join(image_dir, f"{Path(p).stem[:-3]}.jpg") for p in gt_paths] 91 92 return image_paths, gt_paths 93 94 95def get_idrid_dataset( 96 path: Union[os.PathLike, str], 97 patch_shape: Tuple[int, int], 98 split: Literal['train', 'test'], 99 task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', 100 resize_inputs: bool = False, 101 download: bool = False, 102 **kwargs 103) -> Dataset: 104 """Get the IDRID dataset for segmentation of retinal lesions and optic disc in fundus images. 105 106 Args: 107 path: Filepath to a folder where the data is downloaded for further processing. 108 patch_shape: The patch shape to use for training. 109 split: The choice of data split. 110 task: The choice of labels for the specific task. 111 resize_inputs: Whether to resize the inputs to the expected patch shape. 112 download: Whether to download the data if it is not present. 113 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 114 115 Returns: 116 The segmentation dataset. 117 """ 118 image_paths, gt_paths = get_idrid_paths(path, split, task, download) 119 120 if resize_inputs: 121 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 122 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 123 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 124 ) 125 126 return torch_em.default_segmentation_dataset( 127 raw_paths=image_paths, 128 raw_key=None, 129 label_paths=gt_paths, 130 label_key=None, 131 patch_shape=patch_shape, 132 is_seg_dataset=False, 133 **kwargs 134 ) 135 136 137def get_idrid_loader( 138 path: Union[os.PathLike, str], 139 batch_size: int, 140 patch_shape: Tuple[int, int], 141 split: Literal['train', 'test'], 142 task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', 143 resize_inputs: bool = False, 144 download: bool = False, 145 **kwargs 146) -> DataLoader: 147 """Get the IDRID dataloader for segmentation of retinal lesions and optic disc in fundus images. 148 149 Args: 150 path: Filepath to a folder where the data is downloaded for further processing. 151 batch_size: The batch size for training. 152 patch_shape: The patch shape to use for training. 153 split: The choice of data split. 154 task: The choice of labels for the specific task. 155 resize_inputs: Whether to resize the inputs to the expected patch shape. 156 download: Whether to download the data if it is not present. 157 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 158 159 Returns: 160 The DataLoader. 161 """ 162 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 163 dataset = get_idrid_dataset(path, patch_shape, split, task, resize_inputs, download, **ds_kwargs) 164 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
TASKS =
{'microaneurysms': '1. Microaneurysms', 'haemorrhages': '2. Haemorrhages', 'hard_exudates': '3. Hard Exudates', 'soft_exudates': '4. Soft Exudates', 'optic_disc': '5. Optic Disc'}
def
get_idrid_data(path: Union[os.PathLike, str], download: bool = False) -> str:
35def get_idrid_data(path: Union[os.PathLike, str], download: bool = False) -> str: 36 """Download the IDRID dataset. 37 38 Args: 39 path: Filepath to a folder where the data is downloaded for further processing. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 Filepath where the data is downloaded. 44 """ 45 data_dir = os.path.join(path, "data", "A.%20Segmentation") 46 if os.path.exists(data_dir): 47 return data_dir 48 49 os.makedirs(path, exist_ok=True) 50 51 util.download_source_kaggle( 52 path=path, dataset_name="aaryapatel98/indian-diabetic-retinopathy-image-dataset", download=download, 53 ) 54 zip_path = os.path.join(path, "indian-diabetic-retinopathy-image-dataset.zip") 55 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data")) 56 57 return data_dir
Download the IDRID dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_idrid_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'], download: bool = False) -> Tuple[List[str], List[str]]:
60def get_idrid_paths( 61 path: Union[os.PathLike, str], 62 split: Literal['train', 'test'], 63 task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'], 64 download: bool = False 65) -> Tuple[List[str], List[str]]: 66 """Get paths to the IDRID data. 67 68 Args: 69 path: Filepath to a folder where the data is downloaded for further processing. 70 split: The choice of data split. 71 task: The choice of labels for the specific task. 72 download: Whether to download the data if it is not present. 73 74 Returns: 75 List of filepaths for the image data. 76 List of filepaths for the label data. 77 """ 78 data_dir = get_idrid_data(path=path, download=download) 79 80 assert split in ["train", "test"] 81 assert task in list(TASKS.keys()) 82 83 split = r"a. Training Set" if split == "train" else r"b. Testing Set" 84 gt_paths = sorted( 85 glob( 86 os.path.join(data_dir, r"A. Segmentation", r"2. All Segmentation Groundtruths", split, TASKS[task], "*.tif") 87 ) 88 ) 89 90 image_dir = os.path.join(data_dir, r"A. Segmentation", r"1. Original Images", split) 91 image_paths = [os.path.join(image_dir, f"{Path(p).stem[:-3]}.jpg") for p in gt_paths] 92 93 return image_paths, gt_paths
Get paths to the IDRID data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- task: The choice of labels for the specific task.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_idrid_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
96def get_idrid_dataset( 97 path: Union[os.PathLike, str], 98 patch_shape: Tuple[int, int], 99 split: Literal['train', 'test'], 100 task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', 101 resize_inputs: bool = False, 102 download: bool = False, 103 **kwargs 104) -> Dataset: 105 """Get the IDRID dataset for segmentation of retinal lesions and optic disc in fundus images. 106 107 Args: 108 path: Filepath to a folder where the data is downloaded for further processing. 109 patch_shape: The patch shape to use for training. 110 split: The choice of data split. 111 task: The choice of labels for the specific task. 112 resize_inputs: Whether to resize the inputs to the expected patch shape. 113 download: Whether to download the data if it is not present. 114 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 115 116 Returns: 117 The segmentation dataset. 118 """ 119 image_paths, gt_paths = get_idrid_paths(path, split, task, download) 120 121 if resize_inputs: 122 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 123 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 124 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 125 ) 126 127 return torch_em.default_segmentation_dataset( 128 raw_paths=image_paths, 129 raw_key=None, 130 label_paths=gt_paths, 131 label_key=None, 132 patch_shape=patch_shape, 133 is_seg_dataset=False, 134 **kwargs 135 )
Get the IDRID dataset for segmentation of retinal lesions and optic disc in fundus images.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- task: The choice of labels for the specific task.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_idrid_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
138def get_idrid_loader( 139 path: Union[os.PathLike, str], 140 batch_size: int, 141 patch_shape: Tuple[int, int], 142 split: Literal['train', 'test'], 143 task: Literal['microaneurysms', 'haemorrhages', 'hard_exudates', 'soft_exudates', 'optic_disc'] = 'optic_disc', 144 resize_inputs: bool = False, 145 download: bool = False, 146 **kwargs 147) -> DataLoader: 148 """Get the IDRID dataloader for segmentation of retinal lesions and optic disc in fundus images. 149 150 Args: 151 path: Filepath to a folder where the data is downloaded for further processing. 152 batch_size: The batch size for training. 153 patch_shape: The patch shape to use for training. 154 split: The choice of data split. 155 task: The choice of labels for the specific task. 156 resize_inputs: Whether to resize the inputs to the expected patch shape. 157 download: Whether to download the data if it is not present. 158 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 159 160 Returns: 161 The DataLoader. 162 """ 163 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 164 dataset = get_idrid_dataset(path, patch_shape, split, task, resize_inputs, download, **ds_kwargs) 165 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the IDRID dataloader for segmentation of retinal lesions and optic disc in fundus images.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- task: The choice of labels for the specific task.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.