torch_em.data.datasets.medical.kvasir
The KVASIR dataset contains annotations for polyp segmentation in colonoscopy images.
The dataset is located at: https://datasets.simula.no/kvasir-seg/. This dataset is from the publication https://doi.org/10.1007/978-3-030-37734-2_37. Please cite it if you use this dataset for your research.
1"""The KVASIR dataset contains annotations for polyp segmentation 2in colonoscopy images. 3 4The dataset is located at: https://datasets.simula.no/kvasir-seg/. 5This dataset is from the publication https://doi.org/10.1007/978-3-030-37734-2_37. 6Please cite it if you use this dataset for your research. 7""" 8 9import os 10from glob import glob 11from tqdm import tqdm 12from pathlib import Path 13from typing import Union, Tuple, List 14 15import numpy as np 16import imageio.v3 as imageio 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25URL = "https://datasets.simula.no/downloads/kvasir-seg.zip" 26CHECKSUM = "03b30e21d584e04facf49397a2576738fd626815771afbbf788f74a7153478f7" 27 28 29def get_kvasir_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the KVASIR dataset. 31 32 Args: 33 path: Filepath to a folder where the data is downloaded for further processing. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 Filepath where the data is downloaded. 38 """ 39 data_dir = os.path.join(path, "Kvasir-SEG") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "kvasir-seg.zip") 46 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 47 util.unzip(zip_path=zip_path, dst=path) 48 49 return data_dir 50 51 52def get_kvasir_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 53 """Get paths to the KVASIR data. 54 55 Args: 56 path: Filepath to a folder where the data is downloaded for further processing. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_kvasir_data(path=path, download=download) 64 65 image_paths = sorted(glob(os.path.join(data_dir, "images", "*.jpg"))) 66 gt_paths = sorted(glob(os.path.join(data_dir, "masks", "*.jpg"))) 67 68 neu_gt_dir = os.path.join(data_dir, "masks", "preprocessed") 69 os.makedirs(neu_gt_dir, exist_ok=True) 70 71 neu_gt_paths = [] 72 for gt_path in tqdm(gt_paths): 73 neu_gt_path = os.path.join(neu_gt_dir, f"{Path(gt_path).stem}.tif") 74 neu_gt_paths.append(neu_gt_path) 75 if os.path.exists(neu_gt_path): 76 continue 77 78 gt = imageio.imread(gt_path) 79 gt = np.mean(gt, axis=-1) 80 gt = (gt >= 240).astype("uint8") 81 imageio.imwrite(neu_gt_path, gt, compression="zlib") 82 83 return image_paths, neu_gt_paths 84 85 86def get_kvasir_dataset( 87 path: Union[os.PathLike, str], 88 patch_shape: Tuple[int, int], 89 resize_inputs: bool = False, 90 download: bool = False, 91 **kwargs 92) -> Dataset: 93 """Get the KVASIR dataset for polyp segmentation. 94 95 Args: 96 path: Filepath to a folder where the data is downloaded for further processing. 97 patch_shape: The patch shape to use for training. 98 resize_inputs: Whether to resize the inputs to the patch shape. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 image_paths, gt_paths = get_kvasir_paths(path, download) 106 107 if resize_inputs: 108 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 109 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 110 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 111 ) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=image_paths, 115 raw_key=None, 116 label_paths=gt_paths, 117 label_key=None, 118 patch_shape=patch_shape, 119 is_seg_dataset=False, 120 **kwargs 121 ) 122 123 124def get_kvasir_loader( 125 path: Union[os.PathLike, str], 126 patch_shape: Tuple[int, int], 127 batch_size: int, 128 resize_inputs: bool = False, 129 download: bool = False, 130 **kwargs 131) -> DataLoader: 132 """Get the KVASIR dataloader for polyp segmentation. 133 134 Args: 135 path: Filepath to a folder where the data is downloaded for further processing. 136 batch_size: The batch size for training. 137 patch_shape: The patch shape to use for training. 138 resize_inputs: Whether to resize the inputs to the patch shape. 139 download: Whether to download the data if it is not present. 140 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 141 142 Returns: 143 The DataLoader. 144 """ 145 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 146 dataset = get_kvasir_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 147 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://datasets.simula.no/downloads/kvasir-seg.zip'
CHECKSUM =
'03b30e21d584e04facf49397a2576738fd626815771afbbf788f74a7153478f7'
def
get_kvasir_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30def get_kvasir_data(path: Union[os.PathLike, str], download: bool = False) -> str: 31 """Download the KVASIR dataset. 32 33 Args: 34 path: Filepath to a folder where the data is downloaded for further processing. 35 download: Whether to download the data if it is not present. 36 37 Returns: 38 Filepath where the data is downloaded. 39 """ 40 data_dir = os.path.join(path, "Kvasir-SEG") 41 if os.path.exists(data_dir): 42 return data_dir 43 44 os.makedirs(path, exist_ok=True) 45 46 zip_path = os.path.join(path, "kvasir-seg.zip") 47 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 48 util.unzip(zip_path=zip_path, dst=path) 49 50 return data_dir
Download the KVASIR dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_kvasir_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
53def get_kvasir_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 54 """Get paths to the KVASIR data. 55 56 Args: 57 path: Filepath to a folder where the data is downloaded for further processing. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_dir = get_kvasir_data(path=path, download=download) 65 66 image_paths = sorted(glob(os.path.join(data_dir, "images", "*.jpg"))) 67 gt_paths = sorted(glob(os.path.join(data_dir, "masks", "*.jpg"))) 68 69 neu_gt_dir = os.path.join(data_dir, "masks", "preprocessed") 70 os.makedirs(neu_gt_dir, exist_ok=True) 71 72 neu_gt_paths = [] 73 for gt_path in tqdm(gt_paths): 74 neu_gt_path = os.path.join(neu_gt_dir, f"{Path(gt_path).stem}.tif") 75 neu_gt_paths.append(neu_gt_path) 76 if os.path.exists(neu_gt_path): 77 continue 78 79 gt = imageio.imread(gt_path) 80 gt = np.mean(gt, axis=-1) 81 gt = (gt >= 240).astype("uint8") 82 imageio.imwrite(neu_gt_path, gt, compression="zlib") 83 84 return image_paths, neu_gt_paths
Get paths to the KVASIR data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_kvasir_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
87def get_kvasir_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, int], 90 resize_inputs: bool = False, 91 download: bool = False, 92 **kwargs 93) -> Dataset: 94 """Get the KVASIR dataset for polyp segmentation. 95 96 Args: 97 path: Filepath to a folder where the data is downloaded for further processing. 98 patch_shape: The patch shape to use for training. 99 resize_inputs: Whether to resize the inputs to the patch shape. 100 download: Whether to download the data if it is not present. 101 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 102 103 Returns: 104 The segmentation dataset. 105 """ 106 image_paths, gt_paths = get_kvasir_paths(path, download) 107 108 if resize_inputs: 109 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 110 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 111 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 112 ) 113 114 return torch_em.default_segmentation_dataset( 115 raw_paths=image_paths, 116 raw_key=None, 117 label_paths=gt_paths, 118 label_key=None, 119 patch_shape=patch_shape, 120 is_seg_dataset=False, 121 **kwargs 122 )
Get the KVASIR dataset for polyp segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_kvasir_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
125def get_kvasir_loader( 126 path: Union[os.PathLike, str], 127 patch_shape: Tuple[int, int], 128 batch_size: int, 129 resize_inputs: bool = False, 130 download: bool = False, 131 **kwargs 132) -> DataLoader: 133 """Get the KVASIR dataloader for polyp segmentation. 134 135 Args: 136 path: Filepath to a folder where the data is downloaded for further processing. 137 batch_size: The batch size for training. 138 patch_shape: The patch shape to use for training. 139 resize_inputs: Whether to resize the inputs to the patch shape. 140 download: Whether to download the data if it is not present. 141 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 142 143 Returns: 144 The DataLoader. 145 """ 146 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 147 dataset = get_kvasir_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 148 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the KVASIR dataloader for polyp segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.