torch_em.data.datasets.medical.isic

The ISIC dataset contains annotations for lesion segmentation in dermoscopy images.

This dataset is located at torch_em.data.datasets.medical.isic-archive.com/data/#2018">https://challengetorch_em.data.datasets.medical.isic-archive.com/data/#2018 The dataset is related to the following publication(s):

Please cite them if you use this dataset for your research.

View Source

  1"""The ISIC dataset contains annotations for lesion segmentation in dermoscopy images.
  2
  3This dataset is located at https://challenge.isic-archive.com/data/#2018
  4The dataset is related to the following publication(s):
  5- https://doi.org/10.1038/sdata.2018.161
  6- https://doi.org/10.48550/arXiv.1710.05006
  7- https://doi.org/10.48550/arXiv.1902.03368
  8
  9Please cite them if you use this dataset for your research.
 10"""
 11
 12import os
 13from glob import glob
 14from pathlib import Path
 15from natsort import natsorted
 16from typing import Union, Tuple, Literal, List
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23from ..light_microscopy.neurips_cell_seg import to_rgb
 24
 25
 26URL = {
 27    "images": {
 28        "train": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Training_Input.zip",
 29        "val": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Validation_Input.zip",
 30        "test": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Test_Input.zip",
 31    },
 32    "gt": {
 33        "train": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Training_GroundTruth.zip",
 34        "val": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Validation_GroundTruth.zip",
 35        "test": "https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Test_GroundTruth.zip",
 36    },
 37}
 38
 39CHECKSUM = {
 40    "images": {
 41        "train": "80f98572347a2d7a376227fa9eb2e4f7459d317cb619865b8b9910c81446675f",
 42        "val": "0ea920fcfe512d12a6e620b50b50233c059f67b10146e1479c82be58ff15a797",
 43        "test": "e59ae1f69f4ed16f09db2cb1d76c2a828487b63d28f6ab85997f5616869b127d",
 44    },
 45    "gt": {
 46        "train": "99f8b2bb3c4d6af483362010715f7e7d5d122d9f6c02cac0e0d15bef77c7604c",
 47        "val": "f6911e9c0a64e6d687dd3ca466ca927dd5e82145cb2163b7a1e5b37d7a716285",
 48        "test": "2e8f6edce454a5bdee52485e39f92bd6eddf357e81f39018d05512175238ef82",
 49    }
 50}
 51
 52
 53def get_isic_data(
 54    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
 55) -> Tuple[str, str]:
 56    """Download the ISIC data.
 57
 58    Args:
 59        path: Filepath to a folder where the data is downloaded for further processing.
 60        split: The choice of data split.
 61        download: Whether to download the data if it is not present.
 62
 63    Returns:
 64        Filepath where the image data is downloaded.
 65        Filepath where the label data is downloaded.
 66    """
 67    assert split in list(URL["images"].keys()), f"{split} is not a valid split."
 68
 69    im_url = URL["images"][split]
 70    im_checksum = CHECKSUM["images"][split]
 71
 72    gt_url = URL["gt"][split]
 73    gt_checksum = CHECKSUM["gt"][split]
 74
 75    im_zipfile = os.path.split(im_url)[-1]
 76    gt_zipfile = os.path.split(gt_url)[-1]
 77
 78    imdir = os.path.join(path, Path(im_zipfile).stem)
 79    gtdir = os.path.join(path, Path(gt_zipfile).stem)
 80
 81    if os.path.exists(imdir) and os.path.exists(gtdir):
 82        return imdir, gtdir
 83
 84    os.makedirs(path, exist_ok=True)
 85
 86    im_zip_path = os.path.join(path, im_zipfile)
 87    gt_zip_path = os.path.join(path, gt_zipfile)
 88
 89    # download the images
 90    util.download_source(path=im_zip_path, url=im_url, download=download, checksum=im_checksum)
 91    util.unzip(zip_path=im_zip_path, dst=path, remove=False)
 92    # download the ground-truth
 93    util.download_source(path=gt_zip_path, url=gt_url, download=download, checksum=gt_checksum)
 94    util.unzip(zip_path=gt_zip_path, dst=path, remove=False)
 95
 96    return imdir, gtdir
 97
 98
 99def get_isic_paths(
100    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
101) -> Tuple[List[str], List[str]]:
102    """Get paths to the ISIC data.
103
104    Args:
105        path: Filepath to a folder where the data is downloaded for further processing.
106        split: The choice of data split.
107        download: Whether to download the data if it is not present.
108
109    Returns:
110        List of filepaths for the image data.
111        List of filepaths for the label data.
112    """
113    image_dir, gt_dir = get_isic_data(path=path, split=split, download=download)
114
115    image_paths = natsorted(glob(os.path.join(image_dir, "*.jpg")))
116    gt_paths = natsorted(glob(os.path.join(gt_dir, "*.png")))
117
118    return image_paths, gt_paths
119
120
121def get_isic_dataset(
122    path: Union[os.PathLike, str],
123    patch_shape: Tuple[int, int],
124    split: Literal['train', 'val', 'test'],
125    resize_inputs: bool = False,
126    download: bool = False,
127    **kwargs
128) -> Dataset:
129    """Get the ISIC dataset for skin lesion segmentation in dermoscopy images.
130
131    Args:
132        path: Filepath to a folder where the downloaded data will be saved.
133        patch_shape: The patch shape to use for training.
134        split: The choice of data split.
135        resize_inputs: Whether to resize the inputs to the expected patch shape.
136        download: Whether to download the data if it is not present.
137        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
138
139    Returns:
140        The segmentation dataset.
141    """
142    image_paths, gt_paths = get_isic_paths(path=path, split=split, download=download)
143
144    if resize_inputs:
145        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
146        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
147            kwargs=kwargs,
148            patch_shape=patch_shape,
149            resize_inputs=resize_inputs,
150            resize_kwargs=resize_kwargs,
151            ensure_rgb=to_rgb,
152        )
153
154    return torch_em.default_segmentation_dataset(
155        raw_paths=image_paths,
156        raw_key=None,
157        label_paths=gt_paths,
158        label_key=None,
159        patch_shape=patch_shape,
160        is_seg_dataset=False,
161        **kwargs
162    )
163
164
165def get_isic_loader(
166    path: Union[os.PathLike, str],
167    batch_size: int,
168    patch_shape: Tuple[int, int],
169    split: Literal['train', 'val', 'test'],
170    resize_inputs: bool = False,
171    download: bool = False,
172    **kwargs
173) -> DataLoader:
174    """Get the ISIC dataloader for skin lesion segmentation in dermoscopy images.
175
176    Args:
177        path: Filepath to a folder where the downloaded data will be saved.
178        batch_size: The batch size for training.
179        patch_shape: The patch shape to use for training.
180        split: The choice of data split.
181        resize_inputs: Whether to resize the inputs to the expected patch shape.
182        download: Whether to download the data if it is not present.
183        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
184
185    Returns:
186        The DataLoader.
187    """
188    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
189    dataset = get_isic_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
190    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = {'images': {'train': 'https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Training_Input.zip', 'val': 'https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Validation_Input.zip', 'test': 'https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1-2_Test_Input.zip'}, 'gt': {'train': 'https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Training_GroundTruth.zip', 'val': 'https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Validation_GroundTruth.zip', 'test': 'https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task1_Test_GroundTruth.zip'}}

CHECKSUM = {'images': {'train': '80f98572347a2d7a376227fa9eb2e4f7459d317cb619865b8b9910c81446675f', 'val': '0ea920fcfe512d12a6e620b50b50233c059f67b10146e1479c82be58ff15a797', 'test': 'e59ae1f69f4ed16f09db2cb1d76c2a828487b63d28f6ab85997f5616869b127d'}, 'gt': {'train': '99f8b2bb3c4d6af483362010715f7e7d5d122d9f6c02cac0e0d15bef77c7604c', 'val': 'f6911e9c0a64e6d687dd3ca466ca927dd5e82145cb2163b7a1e5b37d7a716285', 'test': '2e8f6edce454a5bdee52485e39f92bd6eddf357e81f39018d05512175238ef82'}}

def get_isic_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[str, str]: View Source

54def get_isic_data(
55    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
56) -> Tuple[str, str]:
57    """Download the ISIC data.
58
59    Args:
60        path: Filepath to a folder where the data is downloaded for further processing.
61        split: The choice of data split.
62        download: Whether to download the data if it is not present.
63
64    Returns:
65        Filepath where the image data is downloaded.
66        Filepath where the label data is downloaded.
67    """
68    assert split in list(URL["images"].keys()), f"{split} is not a valid split."
69
70    im_url = URL["images"][split]
71    im_checksum = CHECKSUM["images"][split]
72
73    gt_url = URL["gt"][split]
74    gt_checksum = CHECKSUM["gt"][split]
75
76    im_zipfile = os.path.split(im_url)[-1]
77    gt_zipfile = os.path.split(gt_url)[-1]
78
79    imdir = os.path.join(path, Path(im_zipfile).stem)
80    gtdir = os.path.join(path, Path(gt_zipfile).stem)
81
82    if os.path.exists(imdir) and os.path.exists(gtdir):
83        return imdir, gtdir
84
85    os.makedirs(path, exist_ok=True)
86
87    im_zip_path = os.path.join(path, im_zipfile)
88    gt_zip_path = os.path.join(path, gt_zipfile)
89
90    # download the images
91    util.download_source(path=im_zip_path, url=im_url, download=download, checksum=im_checksum)
92    util.unzip(zip_path=im_zip_path, dst=path, remove=False)
93    # download the ground-truth
94    util.download_source(path=gt_zip_path, url=gt_url, download=download, checksum=gt_checksum)
95    util.unzip(zip_path=gt_zip_path, dst=path, remove=False)
96
97    return imdir, gtdir

Download the ISIC data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
download: Whether to download the data if it is not present.

Returns:

Filepath where the image data is downloaded. Filepath where the label data is downloaded.

def get_isic_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

100def get_isic_paths(
101    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
102) -> Tuple[List[str], List[str]]:
103    """Get paths to the ISIC data.
104
105    Args:
106        path: Filepath to a folder where the data is downloaded for further processing.
107        split: The choice of data split.
108        download: Whether to download the data if it is not present.
109
110    Returns:
111        List of filepaths for the image data.
112        List of filepaths for the label data.
113    """
114    image_dir, gt_dir = get_isic_data(path=path, split=split, download=download)
115
116    image_paths = natsorted(glob(os.path.join(image_dir, "*.jpg")))
117    gt_paths = natsorted(glob(os.path.join(gt_dir, "*.png")))
118
119    return image_paths, gt_paths

Get paths to the ISIC data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_isic_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

122def get_isic_dataset(
123    path: Union[os.PathLike, str],
124    patch_shape: Tuple[int, int],
125    split: Literal['train', 'val', 'test'],
126    resize_inputs: bool = False,
127    download: bool = False,
128    **kwargs
129) -> Dataset:
130    """Get the ISIC dataset for skin lesion segmentation in dermoscopy images.
131
132    Args:
133        path: Filepath to a folder where the downloaded data will be saved.
134        patch_shape: The patch shape to use for training.
135        split: The choice of data split.
136        resize_inputs: Whether to resize the inputs to the expected patch shape.
137        download: Whether to download the data if it is not present.
138        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
139
140    Returns:
141        The segmentation dataset.
142    """
143    image_paths, gt_paths = get_isic_paths(path=path, split=split, download=download)
144
145    if resize_inputs:
146        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
147        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
148            kwargs=kwargs,
149            patch_shape=patch_shape,
150            resize_inputs=resize_inputs,
151            resize_kwargs=resize_kwargs,
152            ensure_rgb=to_rgb,
153        )
154
155    return torch_em.default_segmentation_dataset(
156        raw_paths=image_paths,
157        raw_key=None,
158        label_paths=gt_paths,
159        label_key=None,
160        patch_shape=patch_shape,
161        is_seg_dataset=False,
162        **kwargs
163    )

Get the ISIC dataset for skin lesion segmentation in dermoscopy images.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the expected patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_isic_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

166def get_isic_loader(
167    path: Union[os.PathLike, str],
168    batch_size: int,
169    patch_shape: Tuple[int, int],
170    split: Literal['train', 'val', 'test'],
171    resize_inputs: bool = False,
172    download: bool = False,
173    **kwargs
174) -> DataLoader:
175    """Get the ISIC dataloader for skin lesion segmentation in dermoscopy images.
176
177    Args:
178        path: Filepath to a folder where the downloaded data will be saved.
179        batch_size: The batch size for training.
180        patch_shape: The patch shape to use for training.
181        split: The choice of data split.
182        resize_inputs: Whether to resize the inputs to the expected patch shape.
183        download: Whether to download the data if it is not present.
184        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
185
186    Returns:
187        The DataLoader.
188    """
189    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
190    dataset = get_isic_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
191    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the ISIC dataloader for skin lesion segmentation in dermoscopy images.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the expected patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.