torch_em.data.datasets.medical.busi
The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images.
This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset. The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863. Please cite it if you use this dataset for a publication.
1"""The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images. 2 3This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset. 4The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863. 5Please cite it if you use this dataset for a publication. 6""" 7 8import os 9from glob import glob 10from typing import Union, Tuple, Optional, Literal, List 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19URL = "https://scholar.cu.edu.eg/Dataset_BUSI.zip" 20CHECKSUM = "b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7" 21 22 23def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str: 24 """Download the BUSI dataset. 25 26 Args: 27 path: Filepath to a folder where the data is downloaded for further processing. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 Filepath where the data is downloaded. 32 """ 33 data_dir = os.path.join(path, "Dataset_BUSI_with_GT") 34 if os.path.exists(data_dir): 35 return data_dir 36 37 os.makedirs(path, exist_ok=True) 38 39 zip_path = os.path.join(path, "Dataset_BUSI.zip") 40 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False) 41 util.unzip(zip_path=zip_path, dst=path) 42 43 return data_dir 44 45 46def get_busi_paths( 47 path: Union[os.PathLike, str], 48 category: Optional[Literal["normal", "benign", "malignant"]] = None, 49 download: bool = False 50) -> Tuple[List[str], List[str]]: 51 """Get paths to the BUSI data. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 category: The choice of data sub-category. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 Filepath where the data is downloaded. 60 """ 61 data_dir = get_busi_data(path=path, download=download) 62 63 if category is None: 64 category = "*" 65 else: 66 if category not in ["normal", "benign", "malignant"]: 67 raise ValueError(f"'{category}' is not a valid category choice.") 68 69 data_dir = os.path.join(data_dir, category) 70 71 image_paths = sorted(glob(os.path.join(data_dir, r"*).png"))) 72 gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png"))) 73 74 return image_paths, gt_paths 75 76 77def get_busi_dataset( 78 path: Union[os.PathLike, str], 79 patch_shape: Tuple[int, int], 80 category: Optional[Literal["normal", "benign", "malignant"]] = None, 81 resize_inputs: bool = False, 82 download: bool = False, 83 **kwargs 84) -> Dataset: 85 """Get the BUSI dataset for breast cancer segmentation. 86 87 Args: 88 path: Filepath to a folder where the data is downloaded for further processing. 89 patch_shape: The patch shape to use for training. 90 category: The choice of data sub-category. 91 resize_inputs: Whether to resize the inputs. 92 download: Whether to download the data if it is not present. 93 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 94 95 Returns: 96 The segmentation dataset. 97 """ 98 image_paths, gt_paths = get_busi_paths(path, category, download) 99 100 if resize_inputs: 101 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 102 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 103 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 104 ) 105 106 return torch_em.default_segmentation_dataset( 107 raw_paths=image_paths, 108 raw_key=None, 109 label_paths=gt_paths, 110 label_key=None, 111 patch_shape=patch_shape, 112 **kwargs 113 ) 114 115 116def get_busi_loader( 117 path: Union[os.PathLike, str], 118 batch_size: int, 119 patch_shape: Tuple[int, int], 120 category: Optional[Literal["normal", "benign", "malignant"]] = None, 121 resize_inputs: bool = False, 122 download: bool = False, 123 **kwargs 124) -> DataLoader: 125 """Get the BUSI dataloader for breast cancer segmentation. 126 127 Args: 128 path: Filepath to a folder where the data is downloaded for further processing. 129 patch_shape: The patch shape to use for training. 130 category: The choice of data sub-category. 131 resize_inputs: Whether to resize the inputs. 132 download: Whether to download the data if it is not present. 133 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 134 135 Returns: 136 The DataLoader. 137 """ 138 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 139 dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs) 140 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://scholar.cu.edu.eg/Dataset_BUSI.zip'
CHECKSUM =
'b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7'
def
get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str: 25 """Download the BUSI dataset. 26 27 Args: 28 path: Filepath to a folder where the data is downloaded for further processing. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 Filepath where the data is downloaded. 33 """ 34 data_dir = os.path.join(path, "Dataset_BUSI_with_GT") 35 if os.path.exists(data_dir): 36 return data_dir 37 38 os.makedirs(path, exist_ok=True) 39 40 zip_path = os.path.join(path, "Dataset_BUSI.zip") 41 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False) 42 util.unzip(zip_path=zip_path, dst=path) 43 44 return data_dir
Download the BUSI dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_busi_paths( path: Union[os.PathLike, str], category: Optional[Literal['normal', 'benign', 'malignant']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
47def get_busi_paths( 48 path: Union[os.PathLike, str], 49 category: Optional[Literal["normal", "benign", "malignant"]] = None, 50 download: bool = False 51) -> Tuple[List[str], List[str]]: 52 """Get paths to the BUSI data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 category: The choice of data sub-category. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 Filepath where the data is downloaded. 61 """ 62 data_dir = get_busi_data(path=path, download=download) 63 64 if category is None: 65 category = "*" 66 else: 67 if category not in ["normal", "benign", "malignant"]: 68 raise ValueError(f"'{category}' is not a valid category choice.") 69 70 data_dir = os.path.join(data_dir, category) 71 72 image_paths = sorted(glob(os.path.join(data_dir, r"*).png"))) 73 gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png"))) 74 75 return image_paths, gt_paths
Get paths to the BUSI data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- category: The choice of data sub-category.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_busi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
78def get_busi_dataset( 79 path: Union[os.PathLike, str], 80 patch_shape: Tuple[int, int], 81 category: Optional[Literal["normal", "benign", "malignant"]] = None, 82 resize_inputs: bool = False, 83 download: bool = False, 84 **kwargs 85) -> Dataset: 86 """Get the BUSI dataset for breast cancer segmentation. 87 88 Args: 89 path: Filepath to a folder where the data is downloaded for further processing. 90 patch_shape: The patch shape to use for training. 91 category: The choice of data sub-category. 92 resize_inputs: Whether to resize the inputs. 93 download: Whether to download the data if it is not present. 94 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 95 96 Returns: 97 The segmentation dataset. 98 """ 99 image_paths, gt_paths = get_busi_paths(path, category, download) 100 101 if resize_inputs: 102 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 103 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 104 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 105 ) 106 107 return torch_em.default_segmentation_dataset( 108 raw_paths=image_paths, 109 raw_key=None, 110 label_paths=gt_paths, 111 label_key=None, 112 patch_shape=patch_shape, 113 **kwargs 114 )
Get the BUSI dataset for breast cancer segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- category: The choice of data sub-category.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_busi_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
117def get_busi_loader( 118 path: Union[os.PathLike, str], 119 batch_size: int, 120 patch_shape: Tuple[int, int], 121 category: Optional[Literal["normal", "benign", "malignant"]] = None, 122 resize_inputs: bool = False, 123 download: bool = False, 124 **kwargs 125) -> DataLoader: 126 """Get the BUSI dataloader for breast cancer segmentation. 127 128 Args: 129 path: Filepath to a folder where the data is downloaded for further processing. 130 patch_shape: The patch shape to use for training. 131 category: The choice of data sub-category. 132 resize_inputs: Whether to resize the inputs. 133 download: Whether to download the data if it is not present. 134 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 135 136 Returns: 137 The DataLoader. 138 """ 139 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 140 dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs) 141 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the BUSI dataloader for breast cancer segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- category: The choice of data sub-category.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.