torch_em.data.datasets.medical.piccolo

The PICCOLO dataset contains annotations for polyp segmentation in narrow band imaging colonoscopy.

NOTE: Automatic download is not supported with this dataset. See 'get_piccolo_data' for details.

The dataset is from the publication https://doi.org/10.3390/app10238501. Please cite it if you use this dataset for your research.

View Source

  1"""The PICCOLO dataset contains annotations for polyp segmentation
  2in narrow band imaging colonoscopy.
  3
  4NOTE: Automatic download is not supported with this dataset. See 'get_piccolo_data' for details.
  5
  6The dataset is from the publication https://doi.org/10.3390/app10238501.
  7Please cite it if you use this dataset for your research.
  8"""
  9
 10import os
 11from glob import glob
 12from natsort import natsorted
 13from typing import Union, Tuple, Literal, List
 14
 15from torch.utils.data import Dataset, DataLoader
 16
 17import torch_em
 18
 19from .. import util
 20
 21
 22def get_piccolo_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 23    """Get the PICCOLO dataset.
 24
 25    The database is located at:
 26    - https://www.biobancovasco.bioef.eus/en/Sample-and-data-e-catalog/Databases/PD178-PICCOLO-EN1.html
 27
 28    Follow the instructions below to get access to the dataset.
 29    - Visit the attached website above
 30    - Fill up the access request form: https://labur.eus/EzJUN
 31    - Send an email to Basque Biobank at solicitudes.biobancovasco@bioef.eus, requesting access to the dataset.
 32    - The team will request you to follow-up with some formalities.
 33    - Then, you will gain access to the ".rar" file.
 34    - Finally, provide the path where the rar file is stored, and you should be good to go.
 35
 36    Args:
 37        path: Filepath to a folder where the data is downloaded for further processing.
 38        download: Whether to download the data if it is not present.
 39
 40    Returns:
 41        Filepath where the data is downloaded.
 42    """
 43    data_dir = os.path.join(path, r"piccolo dataset-release0.1")
 44    if os.path.exists(data_dir):
 45        return data_dir
 46
 47    if download:
 48        raise NotImplementedError(
 49            "Automatic download is not possible for this dataset. See 'get_piccolo_data' for details."
 50        )
 51
 52    rar_file = os.path.join(path, r"piccolo dataset_widefield-release0.1.rar")
 53    if not os.path.exists(rar_file):
 54        raise FileNotFoundError(
 55            "You must download the PICCOLO dataset from the Basque Biobank, see 'get_piccolo_data' for details."
 56        )
 57
 58    util.unzip_rarfile(rar_path=rar_file, dst=path, remove=False)
 59    return data_dir
 60
 61
 62def get_piccolo_paths(
 63    path: Union[os.PathLike, str], split: Literal['train', 'validation', 'test'], download: bool = False
 64) -> Tuple[List[str], List[str]]:
 65    """Get paths to the PICCOLO data.
 66
 67    Args:
 68        path: Filepath to a folder where the data is downloaded for further processing.
 69        split: The choice of data split.
 70        download: Whether to download the data if it is not present.
 71
 72    Returns:
 73        List of filepaths for the image data.
 74        List of filepaths for the label data.
 75    """
 76    data_dir = get_piccolo_data(path, download)
 77
 78    image_paths = natsorted(glob(os.path.join(data_dir, split, "polyps", "*")))
 79    gt_paths = natsorted(glob(os.path.join(data_dir, split, "masks", "*")))
 80
 81    return image_paths, gt_paths
 82
 83
 84def get_piccolo_dataset(
 85    path: Union[os.PathLike, str],
 86    patch_shape: Tuple[int, int],
 87    split: Literal["train", "validation", "test"],
 88    resize_inputs: bool = False,
 89    download: bool = False,
 90    **kwargs
 91) -> Dataset:
 92    """Get the PICCOLO dataset for polyp segmentation in narrow band imaging colonoscopy images.
 93
 94    Args:
 95        path: Filepath to a folder where the data is downloaded for further processing.
 96        patch_shape: The patch shape to use for training.
 97        split: The choice of data split.
 98        resize_inputs: Whether to resize inputs to the desired patch shape.
 99        download: Whether to download the data if it is not present.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103        The segmentation dataset.
104    """
105    image_paths, gt_paths = get_piccolo_paths(path, split, download)
106
107    if resize_inputs:
108        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
109        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
110            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
111        )
112
113    return torch_em.default_segmentation_dataset(
114        raw_paths=image_paths,
115        raw_key=None,
116        label_paths=gt_paths,
117        label_key=None,
118        patch_shape=patch_shape,
119        is_seg_dataset=False,
120        **kwargs
121    )
122
123
124def get_piccolo_loader(
125    path: Union[os.PathLike, str],
126    batch_size: int,
127    patch_shape: Tuple[int, int],
128    split: Literal["train", "validation", "test"],
129    resize_inputs: bool = False,
130    download: bool = False,
131    **kwargs
132) -> DataLoader:
133    """Get the PICCOLO dataloader for polyp segmentation in narrow band imaging colonoscopy images.
134
135    Args:
136        path: Filepath to a folder where the data is downloaded for further processing.
137        batch_size: The batch size for training.
138        patch_shape: The patch shape to use for training.
139        split: The choice of data split.
140        resize_inputs: Whether to resize inputs to the desired patch shape.
141        download: Whether to download the data if it is not present.
142        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
143
144    Returns:
145        The DataLoader.
146    """
147    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
148    dataset = get_piccolo_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
149    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

def get_piccolo_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

23def get_piccolo_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24    """Get the PICCOLO dataset.
25
26    The database is located at:
27    - https://www.biobancovasco.bioef.eus/en/Sample-and-data-e-catalog/Databases/PD178-PICCOLO-EN1.html
28
29    Follow the instructions below to get access to the dataset.
30    - Visit the attached website above
31    - Fill up the access request form: https://labur.eus/EzJUN
32    - Send an email to Basque Biobank at solicitudes.biobancovasco@bioef.eus, requesting access to the dataset.
33    - The team will request you to follow-up with some formalities.
34    - Then, you will gain access to the ".rar" file.
35    - Finally, provide the path where the rar file is stored, and you should be good to go.
36
37    Args:
38        path: Filepath to a folder where the data is downloaded for further processing.
39        download: Whether to download the data if it is not present.
40
41    Returns:
42        Filepath where the data is downloaded.
43    """
44    data_dir = os.path.join(path, r"piccolo dataset-release0.1")
45    if os.path.exists(data_dir):
46        return data_dir
47
48    if download:
49        raise NotImplementedError(
50            "Automatic download is not possible for this dataset. See 'get_piccolo_data' for details."
51        )
52
53    rar_file = os.path.join(path, r"piccolo dataset_widefield-release0.1.rar")
54    if not os.path.exists(rar_file):
55        raise FileNotFoundError(
56            "You must download the PICCOLO dataset from the Basque Biobank, see 'get_piccolo_data' for details."
57        )
58
59    util.unzip_rarfile(rar_path=rar_file, dst=path, remove=False)
60    return data_dir

Get the PICCOLO dataset.

The database is located at:

https://www.biobancovasco.bioef.eus/en/Sample-and-data-e-catalog/Databases/PD178-PICCOLO-EN1.html

Follow the instructions below to get access to the dataset.

Visit the attached website above
Fill up the access request form: https://labur.eus/EzJUN
Send an email to Basque Biobank at solicitudes.biobancovasco@bioef.eus, requesting access to the dataset.
The team will request you to follow-up with some formalities.
Then, you will gain access to the ".rar" file.
Finally, provide the path where the rar file is stored, and you should be good to go.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_piccolo_paths( path: Union[os.PathLike, str], split: Literal['train', 'validation', 'test'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

63def get_piccolo_paths(
64    path: Union[os.PathLike, str], split: Literal['train', 'validation', 'test'], download: bool = False
65) -> Tuple[List[str], List[str]]:
66    """Get paths to the PICCOLO data.
67
68    Args:
69        path: Filepath to a folder where the data is downloaded for further processing.
70        split: The choice of data split.
71        download: Whether to download the data if it is not present.
72
73    Returns:
74        List of filepaths for the image data.
75        List of filepaths for the label data.
76    """
77    data_dir = get_piccolo_data(path, download)
78
79    image_paths = natsorted(glob(os.path.join(data_dir, split, "polyps", "*")))
80    gt_paths = natsorted(glob(os.path.join(data_dir, split, "masks", "*")))
81
82    return image_paths, gt_paths

Get paths to the PICCOLO data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_piccolo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'validation', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 85def get_piccolo_dataset(
 86    path: Union[os.PathLike, str],
 87    patch_shape: Tuple[int, int],
 88    split: Literal["train", "validation", "test"],
 89    resize_inputs: bool = False,
 90    download: bool = False,
 91    **kwargs
 92) -> Dataset:
 93    """Get the PICCOLO dataset for polyp segmentation in narrow band imaging colonoscopy images.
 94
 95    Args:
 96        path: Filepath to a folder where the data is downloaded for further processing.
 97        patch_shape: The patch shape to use for training.
 98        split: The choice of data split.
 99        resize_inputs: Whether to resize inputs to the desired patch shape.
100        download: Whether to download the data if it is not present.
101        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
102
103    Returns:
104        The segmentation dataset.
105    """
106    image_paths, gt_paths = get_piccolo_paths(path, split, download)
107
108    if resize_inputs:
109        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
110        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
111            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
112        )
113
114    return torch_em.default_segmentation_dataset(
115        raw_paths=image_paths,
116        raw_key=None,
117        label_paths=gt_paths,
118        label_key=None,
119        patch_shape=patch_shape,
120        is_seg_dataset=False,
121        **kwargs
122    )

Get the PICCOLO dataset for polyp segmentation in narrow band imaging colonoscopy images.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_piccolo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'validation', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

125def get_piccolo_loader(
126    path: Union[os.PathLike, str],
127    batch_size: int,
128    patch_shape: Tuple[int, int],
129    split: Literal["train", "validation", "test"],
130    resize_inputs: bool = False,
131    download: bool = False,
132    **kwargs
133) -> DataLoader:
134    """Get the PICCOLO dataloader for polyp segmentation in narrow band imaging colonoscopy images.
135
136    Args:
137        path: Filepath to a folder where the data is downloaded for further processing.
138        batch_size: The batch size for training.
139        patch_shape: The patch shape to use for training.
140        split: The choice of data split.
141        resize_inputs: Whether to resize inputs to the desired patch shape.
142        download: Whether to download the data if it is not present.
143        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
144
145    Returns:
146        The DataLoader.
147    """
148    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
149    dataset = get_piccolo_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
150    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the PICCOLO dataloader for polyp segmentation in narrow band imaging colonoscopy images.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize inputs to the desired patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.