torch_em.data.datasets.medical.isic
The ISIC dataset contains annotations for lesion segmentation in dermoscopy images.
This dataset is located at torch_em.data.datasets.medical.isic-archive.com/data/#2018">https://challengetorch_em.data.datasets.medical.isic-archive.com/data/#2018 The dataset is related to the following publication(s):
- https://doi.org/10.1038/sdata.2018.161
- https://doi.org/10.48550/arXiv.1710.05006
- https://doi.org/10.48550/arXiv.1902.03368
Please cite them if you use this dataset for your research.
1"""The ISIC dataset contains annotations for lesion segmentation in dermoscopy images. 2 3This dataset is located at https://challenge.isic-archive.com/data/#2018 4The dataset is related to the following publication(s): 5- https://doi.org/10.1038/sdata.2018.161 6- https://doi.org/10.48550/arXiv.1710.05006 7- https://doi.org/10.48550/arXiv.1902.03368 8 9Please cite them if you use this dataset for your research. 10""" 11 12import os 13from glob import glob 14from pathlib import Path 15from natsort import natsorted 16from typing import Union, Tuple, Literal, List 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23from ..light_microscopy.neurips_cell_seg import to_rgb 24 25 26URL = { 27 "images": { 28 "train": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Training_Input.zip", 29 "val": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Validation_Input.zip", 30 "test": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Test_Input.zip", 31 }, 32 "gt": { 33 "train": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Training_GroundTruth.zip", 34 "val": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Validation_GroundTruth.zip", 35 "test": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Test_GroundTruth.zip", 36 }, 37} 38 39CHECKSUM = { 40 "images": { 41 "train": "80f98572347a2d7a376227fa9eb2e4f7459d317cb619865b8b9910c81446675f", 42 "val": "0ea920fcfe512d12a6e620b50b50233c059f67b10146e1479c82be58ff15a797", 43 "test": "e59ae1f69f4ed16f09db2cb1d76c2a828487b63d28f6ab85997f5616869b127d", 44 }, 45 "gt": { 46 "train": "99f8b2bb3c4d6af483362010715f7e7d5d122d9f6c02cac0e0d15bef77c7604c", 47 "val": "f6911e9c0a64e6d687dd3ca466ca927dd5e82145cb2163b7a1e5b37d7a716285", 48 "test": "2e8f6edce454a5bdee52485e39f92bd6eddf357e81f39018d05512175238ef82", 49 } 50} 51 52 53def get_isic_data( 54 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 55) -> Tuple[str, str]: 56 """Download the ISIC data. 57 58 Args: 59 path: Filepath to a folder where the data is downloaded for further processing. 60 split: The choice of data split. 61 download: Whether to download the data if it is not present. 62 63 Returns: 64 Filepath where the image data is downloaded. 65 Filepath where the label data is downloaded. 66 """ 67 assert split in list(URL["images"].keys()), f"{split} is not a valid split." 68 69 im_url = URL["images"][split] 70 im_checksum = CHECKSUM["images"][split] 71 72 gt_url = URL["gt"][split] 73 gt_checksum = CHECKSUM["gt"][split] 74 75 im_zipfile = os.path.split(im_url)[-1] 76 gt_zipfile = os.path.split(gt_url)[-1] 77 78 imdir = os.path.join(path, Path(im_zipfile).stem) 79 gtdir = os.path.join(path, Path(gt_zipfile).stem) 80 81 if os.path.exists(imdir) and os.path.exists(gtdir): 82 return imdir, gtdir 83 84 os.makedirs(path, exist_ok=True) 85 86 im_zip_path = os.path.join(path, im_zipfile) 87 gt_zip_path = os.path.join(path, gt_zipfile) 88 89 # download the images 90 util.download_source(path=im_zip_path, url=im_url, download=download, checksum=im_checksum) 91 util.unzip(zip_path=im_zip_path, dst=path, remove=False) 92 # download the ground-truth 93 util.download_source(path=gt_zip_path, url=gt_url, download=download, checksum=gt_checksum) 94 util.unzip(zip_path=gt_zip_path, dst=path, remove=False) 95 96 return imdir, gtdir 97 98 99def get_isic_paths( 100 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 101) -> Tuple[List[str], List[str]]: 102 """Get paths to the ISIC data. 103 104 Args: 105 path: Filepath to a folder where the data is downloaded for further processing. 106 split: The choice of data split. 107 download: Whether to download the data if it is not present. 108 109 Returns: 110 List of filepaths for the image data. 111 List of filepaths for the label data. 112 """ 113 image_dir, gt_dir = get_isic_data(path=path, split=split, download=download) 114 115 image_paths = natsorted(glob(os.path.join(image_dir, "*.jpg"))) 116 gt_paths = natsorted(glob(os.path.join(gt_dir, "*.png"))) 117 118 return image_paths, gt_paths 119 120 121def get_isic_dataset( 122 path: Union[os.PathLike, str], 123 patch_shape: Tuple[int, int], 124 split: Literal['train', 'val', 'test'], 125 resize_inputs: bool = False, 126 download: bool = False, 127 **kwargs 128) -> Dataset: 129 """Get the ISIC dataset for skin lesion segmentation in dermoscopy images. 130 131 Args: 132 path: Filepath to a folder where the downloaded data will be saved. 133 patch_shape: The patch shape to use for training. 134 split: The choice of data split. 135 resize_inputs: Whether to resize the inputs to the expected patch shape. 136 download: Whether to download the data if it is not present. 137 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 138 139 Returns: 140 The segmentation dataset. 141 """ 142 image_paths, gt_paths = get_isic_paths(path=path, split=split, download=download) 143 144 if resize_inputs: 145 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 146 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 147 kwargs=kwargs, 148 patch_shape=patch_shape, 149 resize_inputs=resize_inputs, 150 resize_kwargs=resize_kwargs, 151 ensure_rgb=to_rgb, 152 ) 153 154 return torch_em.default_segmentation_dataset( 155 raw_paths=image_paths, 156 raw_key=None, 157 label_paths=gt_paths, 158 label_key=None, 159 patch_shape=patch_shape, 160 is_seg_dataset=False, 161 **kwargs 162 ) 163 164 165def get_isic_loader( 166 path: Union[os.PathLike, str], 167 batch_size: int, 168 patch_shape: Tuple[int, int], 169 split: Literal['train', 'val', 'test'], 170 resize_inputs: bool = False, 171 download: bool = False, 172 **kwargs 173) -> DataLoader: 174 """Get the ISIC dataloader for skin lesion segmentation in dermoscopy images. 175 176 Args: 177 path: Filepath to a folder where the downloaded data will be saved. 178 batch_size: The batch size for training. 179 patch_shape: The patch shape to use for training. 180 split: The choice of data split. 181 resize_inputs: Whether to resize the inputs to the expected patch shape. 182 download: Whether to download the data if it is not present. 183 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 184 185 Returns: 186 The DataLoader. 187 """ 188 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 189 dataset = get_isic_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 190 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
54def get_isic_data( 55 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 56) -> Tuple[str, str]: 57 """Download the ISIC data. 58 59 Args: 60 path: Filepath to a folder where the data is downloaded for further processing. 61 split: The choice of data split. 62 download: Whether to download the data if it is not present. 63 64 Returns: 65 Filepath where the image data is downloaded. 66 Filepath where the label data is downloaded. 67 """ 68 assert split in list(URL["images"].keys()), f"{split} is not a valid split." 69 70 im_url = URL["images"][split] 71 im_checksum = CHECKSUM["images"][split] 72 73 gt_url = URL["gt"][split] 74 gt_checksum = CHECKSUM["gt"][split] 75 76 im_zipfile = os.path.split(im_url)[-1] 77 gt_zipfile = os.path.split(gt_url)[-1] 78 79 imdir = os.path.join(path, Path(im_zipfile).stem) 80 gtdir = os.path.join(path, Path(gt_zipfile).stem) 81 82 if os.path.exists(imdir) and os.path.exists(gtdir): 83 return imdir, gtdir 84 85 os.makedirs(path, exist_ok=True) 86 87 im_zip_path = os.path.join(path, im_zipfile) 88 gt_zip_path = os.path.join(path, gt_zipfile) 89 90 # download the images 91 util.download_source(path=im_zip_path, url=im_url, download=download, checksum=im_checksum) 92 util.unzip(zip_path=im_zip_path, dst=path, remove=False) 93 # download the ground-truth 94 util.download_source(path=gt_zip_path, url=gt_url, download=download, checksum=gt_checksum) 95 util.unzip(zip_path=gt_zip_path, dst=path, remove=False) 96 97 return imdir, gtdir
Download the ISIC data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the image data is downloaded. Filepath where the label data is downloaded.
100def get_isic_paths( 101 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 102) -> Tuple[List[str], List[str]]: 103 """Get paths to the ISIC data. 104 105 Args: 106 path: Filepath to a folder where the data is downloaded for further processing. 107 split: The choice of data split. 108 download: Whether to download the data if it is not present. 109 110 Returns: 111 List of filepaths for the image data. 112 List of filepaths for the label data. 113 """ 114 image_dir, gt_dir = get_isic_data(path=path, split=split, download=download) 115 116 image_paths = natsorted(glob(os.path.join(image_dir, "*.jpg"))) 117 gt_paths = natsorted(glob(os.path.join(gt_dir, "*.png"))) 118 119 return image_paths, gt_paths
Get paths to the ISIC data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
122def get_isic_dataset( 123 path: Union[os.PathLike, str], 124 patch_shape: Tuple[int, int], 125 split: Literal['train', 'val', 'test'], 126 resize_inputs: bool = False, 127 download: bool = False, 128 **kwargs 129) -> Dataset: 130 """Get the ISIC dataset for skin lesion segmentation in dermoscopy images. 131 132 Args: 133 path: Filepath to a folder where the downloaded data will be saved. 134 patch_shape: The patch shape to use for training. 135 split: The choice of data split. 136 resize_inputs: Whether to resize the inputs to the expected patch shape. 137 download: Whether to download the data if it is not present. 138 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 139 140 Returns: 141 The segmentation dataset. 142 """ 143 image_paths, gt_paths = get_isic_paths(path=path, split=split, download=download) 144 145 if resize_inputs: 146 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 147 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 148 kwargs=kwargs, 149 patch_shape=patch_shape, 150 resize_inputs=resize_inputs, 151 resize_kwargs=resize_kwargs, 152 ensure_rgb=to_rgb, 153 ) 154 155 return torch_em.default_segmentation_dataset( 156 raw_paths=image_paths, 157 raw_key=None, 158 label_paths=gt_paths, 159 label_key=None, 160 patch_shape=patch_shape, 161 is_seg_dataset=False, 162 **kwargs 163 )
Get the ISIC dataset for skin lesion segmentation in dermoscopy images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
166def get_isic_loader( 167 path: Union[os.PathLike, str], 168 batch_size: int, 169 patch_shape: Tuple[int, int], 170 split: Literal['train', 'val', 'test'], 171 resize_inputs: bool = False, 172 download: bool = False, 173 **kwargs 174) -> DataLoader: 175 """Get the ISIC dataloader for skin lesion segmentation in dermoscopy images. 176 177 Args: 178 path: Filepath to a folder where the downloaded data will be saved. 179 batch_size: The batch size for training. 180 patch_shape: The patch shape to use for training. 181 split: The choice of data split. 182 resize_inputs: Whether to resize the inputs to the expected patch shape. 183 download: Whether to download the data if it is not present. 184 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 185 186 Returns: 187 The DataLoader. 188 """ 189 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 190 dataset = get_isic_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 191 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the ISIC dataloader for skin lesion segmentation in dermoscopy images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.