torch_em.data.datasets.medical.piccolo
The PICCOLO dataset contains annotations for polyp segmentation in narrow band imaging colonoscopy.
NOTE: Automatic download is not supported with this dataset. See 'get_piccolo_data' for details.
The dataset is from the publication https://doi.org/10.3390/app10238501. Please cite it if you use this dataset for your research.
1"""The PICCOLO dataset contains annotations for polyp segmentation 2in narrow band imaging colonoscopy. 3 4NOTE: Automatic download is not supported with this dataset. See 'get_piccolo_data' for details. 5 6The dataset is from the publication https://doi.org/10.3390/app10238501. 7Please cite it if you use this dataset for your research. 8""" 9 10import os 11from glob import glob 12from natsort import natsorted 13from typing import Union, Tuple, Literal, List 14 15from torch.utils.data import Dataset, DataLoader 16 17import torch_em 18 19from .. import util 20 21 22def get_piccolo_data(path: Union[os.PathLike, str], download: bool = False) -> str: 23 """Get the PICCOLO dataset. 24 25 The database is located at: 26 - https://www.biobancovasco.bioef.eus/en/Sample-and-data-e-catalog/Databases/PD178-PICCOLO-EN1.html 27 28 Follow the instructions below to get access to the dataset. 29 - Visit the attached website above 30 - Fill up the access request form: https://labur.eus/EzJUN 31 - Send an email to Basque Biobank at solicitudes.biobancovasco@bioef.eus, requesting access to the dataset. 32 - The team will request you to follow-up with some formalities. 33 - Then, you will gain access to the ".rar" file. 34 - Finally, provide the path where the rar file is stored, and you should be good to go. 35 36 Args: 37 path: Filepath to a folder where the data is downloaded for further processing. 38 download: Whether to download the data if it is not present. 39 40 Returns: 41 Filepath where the data is downloaded. 42 """ 43 data_dir = os.path.join(path, r"piccolo dataset-release0.1") 44 if os.path.exists(data_dir): 45 return data_dir 46 47 if download: 48 raise NotImplementedError( 49 "Automatic download is not possible for this dataset. See 'get_piccolo_data' for details." 50 ) 51 52 rar_file = os.path.join(path, r"piccolo dataset_widefield-release0.1.rar") 53 if not os.path.exists(rar_file): 54 raise FileNotFoundError( 55 "You must download the PICCOLO dataset from the Basque Biobank, see 'get_piccolo_data' for details." 56 ) 57 58 util.unzip_rarfile(rar_path=rar_file, dst=path, remove=False) 59 return data_dir 60 61 62def get_piccolo_paths( 63 path: Union[os.PathLike, str], split: Literal['train', 'validation', 'test'], download: bool = False 64) -> Tuple[List[str], List[str]]: 65 """Get paths to the PICCOLO data. 66 67 Args: 68 path: Filepath to a folder where the data is downloaded for further processing. 69 split: The choice of data split. 70 download: Whether to download the data if it is not present. 71 72 Returns: 73 List of filepaths for the image data. 74 List of filepaths for the label data. 75 """ 76 data_dir = get_piccolo_data(path, download) 77 78 image_paths = natsorted(glob(os.path.join(data_dir, split, "polyps", "*"))) 79 gt_paths = natsorted(glob(os.path.join(data_dir, split, "masks", "*"))) 80 81 return image_paths, gt_paths 82 83 84def get_piccolo_dataset( 85 path: Union[os.PathLike, str], 86 patch_shape: Tuple[int, int], 87 split: Literal["train", "validation", "test"], 88 resize_inputs: bool = False, 89 download: bool = False, 90 **kwargs 91) -> Dataset: 92 """Get the PICCOLO dataset for polyp segmentation in narrow band imaging colonoscopy images. 93 94 Args: 95 path: Filepath to a folder where the data is downloaded for further processing. 96 patch_shape: The patch shape to use for training. 97 split: The choice of data split. 98 resize_inputs: Whether to resize inputs to the desired patch shape. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 image_paths, gt_paths = get_piccolo_paths(path, split, download) 106 107 if resize_inputs: 108 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 109 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 110 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 111 ) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=image_paths, 115 raw_key=None, 116 label_paths=gt_paths, 117 label_key=None, 118 patch_shape=patch_shape, 119 is_seg_dataset=False, 120 **kwargs 121 ) 122 123 124def get_piccolo_loader( 125 path: Union[os.PathLike, str], 126 batch_size: int, 127 patch_shape: Tuple[int, int], 128 split: Literal["train", "validation", "test"], 129 resize_inputs: bool = False, 130 download: bool = False, 131 **kwargs 132) -> DataLoader: 133 """Get the PICCOLO dataloader for polyp segmentation in narrow band imaging colonoscopy images. 134 135 Args: 136 path: Filepath to a folder where the data is downloaded for further processing. 137 batch_size: The batch size for training. 138 patch_shape: The patch shape to use for training. 139 split: The choice of data split. 140 resize_inputs: Whether to resize inputs to the desired patch shape. 141 download: Whether to download the data if it is not present. 142 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 143 144 Returns: 145 The DataLoader. 146 """ 147 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 148 dataset = get_piccolo_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 149 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
23def get_piccolo_data(path: Union[os.PathLike, str], download: bool = False) -> str: 24 """Get the PICCOLO dataset. 25 26 The database is located at: 27 - https://www.biobancovasco.bioef.eus/en/Sample-and-data-e-catalog/Databases/PD178-PICCOLO-EN1.html 28 29 Follow the instructions below to get access to the dataset. 30 - Visit the attached website above 31 - Fill up the access request form: https://labur.eus/EzJUN 32 - Send an email to Basque Biobank at solicitudes.biobancovasco@bioef.eus, requesting access to the dataset. 33 - The team will request you to follow-up with some formalities. 34 - Then, you will gain access to the ".rar" file. 35 - Finally, provide the path where the rar file is stored, and you should be good to go. 36 37 Args: 38 path: Filepath to a folder where the data is downloaded for further processing. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 Filepath where the data is downloaded. 43 """ 44 data_dir = os.path.join(path, r"piccolo dataset-release0.1") 45 if os.path.exists(data_dir): 46 return data_dir 47 48 if download: 49 raise NotImplementedError( 50 "Automatic download is not possible for this dataset. See 'get_piccolo_data' for details." 51 ) 52 53 rar_file = os.path.join(path, r"piccolo dataset_widefield-release0.1.rar") 54 if not os.path.exists(rar_file): 55 raise FileNotFoundError( 56 "You must download the PICCOLO dataset from the Basque Biobank, see 'get_piccolo_data' for details." 57 ) 58 59 util.unzip_rarfile(rar_path=rar_file, dst=path, remove=False) 60 return data_dir
Get the PICCOLO dataset.
The database is located at:
Follow the instructions below to get access to the dataset.
- Visit the attached website above
- Fill up the access request form: https://labur.eus/EzJUN
- Send an email to Basque Biobank at solicitudes.biobancovasco@bioef.eus, requesting access to the dataset.
- The team will request you to follow-up with some formalities.
- Then, you will gain access to the ".rar" file.
- Finally, provide the path where the rar file is stored, and you should be good to go.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
63def get_piccolo_paths( 64 path: Union[os.PathLike, str], split: Literal['train', 'validation', 'test'], download: bool = False 65) -> Tuple[List[str], List[str]]: 66 """Get paths to the PICCOLO data. 67 68 Args: 69 path: Filepath to a folder where the data is downloaded for further processing. 70 split: The choice of data split. 71 download: Whether to download the data if it is not present. 72 73 Returns: 74 List of filepaths for the image data. 75 List of filepaths for the label data. 76 """ 77 data_dir = get_piccolo_data(path, download) 78 79 image_paths = natsorted(glob(os.path.join(data_dir, split, "polyps", "*"))) 80 gt_paths = natsorted(glob(os.path.join(data_dir, split, "masks", "*"))) 81 82 return image_paths, gt_paths
Get paths to the PICCOLO data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
85def get_piccolo_dataset( 86 path: Union[os.PathLike, str], 87 patch_shape: Tuple[int, int], 88 split: Literal["train", "validation", "test"], 89 resize_inputs: bool = False, 90 download: bool = False, 91 **kwargs 92) -> Dataset: 93 """Get the PICCOLO dataset for polyp segmentation in narrow band imaging colonoscopy images. 94 95 Args: 96 path: Filepath to a folder where the data is downloaded for further processing. 97 patch_shape: The patch shape to use for training. 98 split: The choice of data split. 99 resize_inputs: Whether to resize inputs to the desired patch shape. 100 download: Whether to download the data if it is not present. 101 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 102 103 Returns: 104 The segmentation dataset. 105 """ 106 image_paths, gt_paths = get_piccolo_paths(path, split, download) 107 108 if resize_inputs: 109 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 110 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 111 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 112 ) 113 114 return torch_em.default_segmentation_dataset( 115 raw_paths=image_paths, 116 raw_key=None, 117 label_paths=gt_paths, 118 label_key=None, 119 patch_shape=patch_shape, 120 is_seg_dataset=False, 121 **kwargs 122 )
Get the PICCOLO dataset for polyp segmentation in narrow band imaging colonoscopy images.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
125def get_piccolo_loader( 126 path: Union[os.PathLike, str], 127 batch_size: int, 128 patch_shape: Tuple[int, int], 129 split: Literal["train", "validation", "test"], 130 resize_inputs: bool = False, 131 download: bool = False, 132 **kwargs 133) -> DataLoader: 134 """Get the PICCOLO dataloader for polyp segmentation in narrow band imaging colonoscopy images. 135 136 Args: 137 path: Filepath to a folder where the data is downloaded for further processing. 138 batch_size: The batch size for training. 139 patch_shape: The patch shape to use for training. 140 split: The choice of data split. 141 resize_inputs: Whether to resize inputs to the desired patch shape. 142 download: Whether to download the data if it is not present. 143 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 144 145 Returns: 146 The DataLoader. 147 """ 148 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 149 dataset = get_piccolo_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 150 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the PICCOLO dataloader for polyp segmentation in narrow band imaging colonoscopy images.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.