torch_em.data.datasets.medical.busi

The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images.

This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset. The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863. Please cite it if you use this dataset for a publication.

  1"""The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images.
  2
  3This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset.
  4The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863.
  5Please cite it if you use this dataset for a publication.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import Union, Tuple, Optional, Literal, List
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19URL = "https://scholar.cu.edu.eg/Dataset_BUSI.zip"
 20CHECKSUM = "b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7"
 21
 22
 23def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 24    """Download the BUSI dataset.
 25
 26    Args:
 27        path: Filepath to a folder where the data is downloaded for further processing.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        Filepath where the data is downloaded.
 32    """
 33    data_dir = os.path.join(path, "Dataset_BUSI_with_GT")
 34    if os.path.exists(data_dir):
 35        return data_dir
 36
 37    os.makedirs(path, exist_ok=True)
 38
 39    zip_path = os.path.join(path, "Dataset_BUSI.zip")
 40    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False)
 41    util.unzip(zip_path=zip_path, dst=path)
 42
 43    return data_dir
 44
 45
 46def get_busi_paths(
 47    path: Union[os.PathLike, str],
 48    category: Optional[Literal["normal", "benign", "malignant"]] = None,
 49    download: bool = False
 50) -> Tuple[List[str], List[str]]:
 51    """Get paths to the BUSI data.
 52
 53    Args:
 54        path: Filepath to a folder where the data is downloaded for further processing.
 55        category: The choice of data sub-category.
 56        download: Whether to download the data if it is not present.
 57
 58    Returns:
 59        Filepath where the data is downloaded.
 60    """
 61    data_dir = get_busi_data(path=path, download=download)
 62
 63    if category is None:
 64        category = "*"
 65    else:
 66        if category not in ["normal", "benign", "malignant"]:
 67            raise ValueError(f"'{category}' is not a valid category choice.")
 68
 69    data_dir = os.path.join(data_dir, category)
 70
 71    image_paths = sorted(glob(os.path.join(data_dir, r"*).png")))
 72    gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png")))
 73
 74    return image_paths, gt_paths
 75
 76
 77def get_busi_dataset(
 78    path: Union[os.PathLike, str],
 79    patch_shape: Tuple[int, int],
 80    category: Optional[Literal["normal", "benign", "malignant"]] = None,
 81    resize_inputs: bool = False,
 82    download: bool = False,
 83    **kwargs
 84) -> Dataset:
 85    """Get the BUSI dataset for breast cancer segmentation.
 86
 87    Args:
 88        path: Filepath to a folder where the data is downloaded for further processing.
 89        patch_shape: The patch shape to use for training.
 90        category: The choice of data sub-category.
 91        resize_inputs: Whether to resize the inputs.
 92        download: Whether to download the data if it is not present.
 93        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 94
 95    Returns:
 96        The segmentation dataset.
 97    """
 98    image_paths, gt_paths = get_busi_paths(path, category, download)
 99
100    if resize_inputs:
101        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
102        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
103            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
104        )
105
106    return torch_em.default_segmentation_dataset(
107        raw_paths=image_paths,
108        raw_key=None,
109        label_paths=gt_paths,
110        label_key=None,
111        patch_shape=patch_shape,
112        **kwargs
113    )
114
115
116def get_busi_loader(
117    path: Union[os.PathLike, str],
118    batch_size: int,
119    patch_shape: Tuple[int, int],
120    category: Optional[Literal["normal", "benign", "malignant"]] = None,
121    resize_inputs: bool = False,
122    download: bool = False,
123    **kwargs
124) -> DataLoader:
125    """Get the BUSI dataloader for breast cancer segmentation.
126
127    Args:
128        path: Filepath to a folder where the data is downloaded for further processing.
129        patch_shape: The patch shape to use for training.
130        category: The choice of data sub-category.
131        resize_inputs: Whether to resize the inputs.
132        download: Whether to download the data if it is not present.
133        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
134
135    Returns:
136        The DataLoader.
137    """
138    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
139    dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs)
140    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://scholar.cu.edu.eg/Dataset_BUSI.zip'
CHECKSUM = 'b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7'
def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25    """Download the BUSI dataset.
26
27    Args:
28        path: Filepath to a folder where the data is downloaded for further processing.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        Filepath where the data is downloaded.
33    """
34    data_dir = os.path.join(path, "Dataset_BUSI_with_GT")
35    if os.path.exists(data_dir):
36        return data_dir
37
38    os.makedirs(path, exist_ok=True)
39
40    zip_path = os.path.join(path, "Dataset_BUSI.zip")
41    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False)
42    util.unzip(zip_path=zip_path, dst=path)
43
44    return data_dir

Download the BUSI dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_busi_paths( path: Union[os.PathLike, str], category: Optional[Literal['normal', 'benign', 'malignant']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
47def get_busi_paths(
48    path: Union[os.PathLike, str],
49    category: Optional[Literal["normal", "benign", "malignant"]] = None,
50    download: bool = False
51) -> Tuple[List[str], List[str]]:
52    """Get paths to the BUSI data.
53
54    Args:
55        path: Filepath to a folder where the data is downloaded for further processing.
56        category: The choice of data sub-category.
57        download: Whether to download the data if it is not present.
58
59    Returns:
60        Filepath where the data is downloaded.
61    """
62    data_dir = get_busi_data(path=path, download=download)
63
64    if category is None:
65        category = "*"
66    else:
67        if category not in ["normal", "benign", "malignant"]:
68            raise ValueError(f"'{category}' is not a valid category choice.")
69
70    data_dir = os.path.join(data_dir, category)
71
72    image_paths = sorted(glob(os.path.join(data_dir, r"*).png")))
73    gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png")))
74
75    return image_paths, gt_paths

Get paths to the BUSI data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • category: The choice of data sub-category.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_busi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 78def get_busi_dataset(
 79    path: Union[os.PathLike, str],
 80    patch_shape: Tuple[int, int],
 81    category: Optional[Literal["normal", "benign", "malignant"]] = None,
 82    resize_inputs: bool = False,
 83    download: bool = False,
 84    **kwargs
 85) -> Dataset:
 86    """Get the BUSI dataset for breast cancer segmentation.
 87
 88    Args:
 89        path: Filepath to a folder where the data is downloaded for further processing.
 90        patch_shape: The patch shape to use for training.
 91        category: The choice of data sub-category.
 92        resize_inputs: Whether to resize the inputs.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97        The segmentation dataset.
 98    """
 99    image_paths, gt_paths = get_busi_paths(path, category, download)
100
101    if resize_inputs:
102        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
103        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
104            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
105        )
106
107    return torch_em.default_segmentation_dataset(
108        raw_paths=image_paths,
109        raw_key=None,
110        label_paths=gt_paths,
111        label_key=None,
112        patch_shape=patch_shape,
113        **kwargs
114    )

Get the BUSI dataset for breast cancer segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • category: The choice of data sub-category.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_busi_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
117def get_busi_loader(
118    path: Union[os.PathLike, str],
119    batch_size: int,
120    patch_shape: Tuple[int, int],
121    category: Optional[Literal["normal", "benign", "malignant"]] = None,
122    resize_inputs: bool = False,
123    download: bool = False,
124    **kwargs
125) -> DataLoader:
126    """Get the BUSI dataloader for breast cancer segmentation.
127
128    Args:
129        path: Filepath to a folder where the data is downloaded for further processing.
130        patch_shape: The patch shape to use for training.
131        category: The choice of data sub-category.
132        resize_inputs: Whether to resize the inputs.
133        download: Whether to download the data if it is not present.
134        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
135
136    Returns:
137        The DataLoader.
138    """
139    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
140    dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs)
141    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the BUSI dataloader for breast cancer segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • category: The choice of data sub-category.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.