torch_em.data.datasets.medical.busi

The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images.

This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset. The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863. Please cite it if you use this dataset for a publication.

  1"""The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images.
  2
  3This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset.
  4The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863.
  5Please cite it if you use this dataset for a publication.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import Union, Tuple, Optional, Literal, List
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19URL = "https://scholar.cu.edu.eg/Dataset_BUSI.zip"
 20CHECKSUM = "b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7"
 21
 22
 23def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 24    """Download the BUSI dataset.
 25
 26    Args:
 27        path: Filepath to a folder where the data is downloaded for further processing.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        Filepath where the data is downloaded.
 32    """
 33    data_dir = os.path.join(path, "Dataset_BUSI_with_GT")
 34    if os.path.exists(data_dir):
 35        return data_dir
 36
 37    os.makedirs(path, exist_ok=True)
 38
 39    zip_path = os.path.join(path, "Dataset_BUSI.zip")
 40    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False)
 41    util.unzip(zip_path=zip_path, dst=path)
 42
 43    return data_dir
 44
 45
 46def get_busi_paths(
 47    path: Union[os.PathLike, str],
 48    category: Optional[Literal["normal", "benign", "malignant"]] = None,
 49    download: bool = False
 50) -> Tuple[List[str], List[str]]:
 51    """Get paths to the BUSI data.
 52
 53    Args:
 54        path: Filepath to a folder where the data is downloaded for further processing.
 55        category: The choice of data sub-category.
 56        download: Whether to download the data if it is not present.
 57
 58    Returns:
 59        List of filepaths for the image data.
 60        List of filepaths for the label data.
 61    """
 62    data_dir = get_busi_data(path=path, download=download)
 63
 64    if category is None:
 65        category = "*"
 66    else:
 67        if category not in ["normal", "benign", "malignant"]:
 68            raise ValueError(f"'{category}' is not a valid category choice.")
 69
 70    data_dir = os.path.join(data_dir, category)
 71
 72    image_paths = sorted(glob(os.path.join(data_dir, r"*).png")))
 73    gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png")))
 74
 75    return image_paths, gt_paths
 76
 77
 78def get_busi_dataset(
 79    path: Union[os.PathLike, str],
 80    patch_shape: Tuple[int, int],
 81    category: Optional[Literal["normal", "benign", "malignant"]] = None,
 82    resize_inputs: bool = False,
 83    download: bool = False,
 84    **kwargs
 85) -> Dataset:
 86    """Get the BUSI dataset for breast cancer segmentation.
 87
 88    Args:
 89        path: Filepath to a folder where the data is downloaded for further processing.
 90        patch_shape: The patch shape to use for training.
 91        category: The choice of data sub-category.
 92        resize_inputs: Whether to resize the inputs.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97        The segmentation dataset.
 98    """
 99    image_paths, gt_paths = get_busi_paths(path, category, download)
100
101    if resize_inputs:
102        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
103        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
104            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
105        )
106
107    return torch_em.default_segmentation_dataset(
108        raw_paths=image_paths,
109        raw_key=None,
110        label_paths=gt_paths,
111        label_key=None,
112        patch_shape=patch_shape,
113        **kwargs
114    )
115
116
117def get_busi_loader(
118    path: Union[os.PathLike, str],
119    batch_size: int,
120    patch_shape: Tuple[int, int],
121    category: Optional[Literal["normal", "benign", "malignant"]] = None,
122    resize_inputs: bool = False,
123    download: bool = False,
124    **kwargs
125) -> DataLoader:
126    """Get the BUSI dataloader for breast cancer segmentation.
127
128    Args:
129        path: Filepath to a folder where the data is downloaded for further processing.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        category: The choice of data sub-category.
133        resize_inputs: Whether to resize the inputs.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
136
137    Returns:
138        The DataLoader.
139    """
140    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
141    dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs)
142    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://scholar.cu.edu.eg/Dataset_BUSI.zip'
CHECKSUM = 'b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7'
def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25    """Download the BUSI dataset.
26
27    Args:
28        path: Filepath to a folder where the data is downloaded for further processing.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        Filepath where the data is downloaded.
33    """
34    data_dir = os.path.join(path, "Dataset_BUSI_with_GT")
35    if os.path.exists(data_dir):
36        return data_dir
37
38    os.makedirs(path, exist_ok=True)
39
40    zip_path = os.path.join(path, "Dataset_BUSI.zip")
41    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False)
42    util.unzip(zip_path=zip_path, dst=path)
43
44    return data_dir

Download the BUSI dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_busi_paths( path: Union[os.PathLike, str], category: Optional[Literal['normal', 'benign', 'malignant']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
47def get_busi_paths(
48    path: Union[os.PathLike, str],
49    category: Optional[Literal["normal", "benign", "malignant"]] = None,
50    download: bool = False
51) -> Tuple[List[str], List[str]]:
52    """Get paths to the BUSI data.
53
54    Args:
55        path: Filepath to a folder where the data is downloaded for further processing.
56        category: The choice of data sub-category.
57        download: Whether to download the data if it is not present.
58
59    Returns:
60        List of filepaths for the image data.
61        List of filepaths for the label data.
62    """
63    data_dir = get_busi_data(path=path, download=download)
64
65    if category is None:
66        category = "*"
67    else:
68        if category not in ["normal", "benign", "malignant"]:
69            raise ValueError(f"'{category}' is not a valid category choice.")
70
71    data_dir = os.path.join(data_dir, category)
72
73    image_paths = sorted(glob(os.path.join(data_dir, r"*).png")))
74    gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png")))
75
76    return image_paths, gt_paths

Get paths to the BUSI data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • category: The choice of data sub-category.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_busi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 79def get_busi_dataset(
 80    path: Union[os.PathLike, str],
 81    patch_shape: Tuple[int, int],
 82    category: Optional[Literal["normal", "benign", "malignant"]] = None,
 83    resize_inputs: bool = False,
 84    download: bool = False,
 85    **kwargs
 86) -> Dataset:
 87    """Get the BUSI dataset for breast cancer segmentation.
 88
 89    Args:
 90        path: Filepath to a folder where the data is downloaded for further processing.
 91        patch_shape: The patch shape to use for training.
 92        category: The choice of data sub-category.
 93        resize_inputs: Whether to resize the inputs.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    image_paths, gt_paths = get_busi_paths(path, category, download)
101
102    if resize_inputs:
103        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
104        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
105            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
106        )
107
108    return torch_em.default_segmentation_dataset(
109        raw_paths=image_paths,
110        raw_key=None,
111        label_paths=gt_paths,
112        label_key=None,
113        patch_shape=patch_shape,
114        **kwargs
115    )

Get the BUSI dataset for breast cancer segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • category: The choice of data sub-category.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_busi_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
118def get_busi_loader(
119    path: Union[os.PathLike, str],
120    batch_size: int,
121    patch_shape: Tuple[int, int],
122    category: Optional[Literal["normal", "benign", "malignant"]] = None,
123    resize_inputs: bool = False,
124    download: bool = False,
125    **kwargs
126) -> DataLoader:
127    """Get the BUSI dataloader for breast cancer segmentation.
128
129    Args:
130        path: Filepath to a folder where the data is downloaded for further processing.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        category: The choice of data sub-category.
134        resize_inputs: Whether to resize the inputs.
135        download: Whether to download the data if it is not present.
136        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
137
138    Returns:
139        The DataLoader.
140    """
141    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
142    dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs)
143    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the BUSI dataloader for breast cancer segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • category: The choice of data sub-category.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.