torch_em.data.datasets.medical.busi
The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images.
This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset. The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863. Please cite it if you use this dataset for a publication.
1"""The BUSI dataset contains annotations for breast cancer segmentation in ultrasound images. 2 3This dataset is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset. 4The dataset is from the publication https://doi.org/10.1016/j.dib.2019.104863. 5Please cite it if you use this dataset for a publication. 6""" 7 8import os 9from glob import glob 10from typing import Union, Tuple, Optional, Literal, List 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19URL = "https://scholar.cu.edu.eg/Dataset_BUSI.zip" 20CHECKSUM = "b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7" 21 22 23def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str: 24 """Download the BUSI dataset. 25 26 Args: 27 path: Filepath to a folder where the data is downloaded for further processing. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 Filepath where the data is downloaded. 32 """ 33 data_dir = os.path.join(path, "Dataset_BUSI_with_GT") 34 if os.path.exists(data_dir): 35 return data_dir 36 37 os.makedirs(path, exist_ok=True) 38 39 zip_path = os.path.join(path, "Dataset_BUSI.zip") 40 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False) 41 util.unzip(zip_path=zip_path, dst=path) 42 43 return data_dir 44 45 46def get_busi_paths( 47 path: Union[os.PathLike, str], 48 category: Optional[Literal["normal", "benign", "malignant"]] = None, 49 download: bool = False 50) -> Tuple[List[str], List[str]]: 51 """Get paths to the BUSI data. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 category: The choice of data sub-category. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 List of filepaths for the image data. 60 List of filepaths for the label data. 61 """ 62 data_dir = get_busi_data(path=path, download=download) 63 64 if category is None: 65 category = "*" 66 else: 67 if category not in ["normal", "benign", "malignant"]: 68 raise ValueError(f"'{category}' is not a valid category choice.") 69 70 data_dir = os.path.join(data_dir, category) 71 72 image_paths = sorted(glob(os.path.join(data_dir, r"*).png"))) 73 gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png"))) 74 75 return image_paths, gt_paths 76 77 78def get_busi_dataset( 79 path: Union[os.PathLike, str], 80 patch_shape: Tuple[int, int], 81 category: Optional[Literal["normal", "benign", "malignant"]] = None, 82 resize_inputs: bool = False, 83 download: bool = False, 84 **kwargs 85) -> Dataset: 86 """Get the BUSI dataset for breast cancer segmentation. 87 88 Args: 89 path: Filepath to a folder where the data is downloaded for further processing. 90 patch_shape: The patch shape to use for training. 91 category: The choice of data sub-category. 92 resize_inputs: Whether to resize the inputs. 93 download: Whether to download the data if it is not present. 94 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 95 96 Returns: 97 The segmentation dataset. 98 """ 99 image_paths, gt_paths = get_busi_paths(path, category, download) 100 101 if resize_inputs: 102 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 103 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 104 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 105 ) 106 107 return torch_em.default_segmentation_dataset( 108 raw_paths=image_paths, 109 raw_key=None, 110 label_paths=gt_paths, 111 label_key=None, 112 patch_shape=patch_shape, 113 **kwargs 114 ) 115 116 117def get_busi_loader( 118 path: Union[os.PathLike, str], 119 batch_size: int, 120 patch_shape: Tuple[int, int], 121 category: Optional[Literal["normal", "benign", "malignant"]] = None, 122 resize_inputs: bool = False, 123 download: bool = False, 124 **kwargs 125) -> DataLoader: 126 """Get the BUSI dataloader for breast cancer segmentation. 127 128 Args: 129 path: Filepath to a folder where the data is downloaded for further processing. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 category: The choice of data sub-category. 133 resize_inputs: Whether to resize the inputs. 134 download: Whether to download the data if it is not present. 135 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 136 137 Returns: 138 The DataLoader. 139 """ 140 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 141 dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs) 142 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://scholar.cu.edu.eg/Dataset_BUSI.zip'
CHECKSUM =
'b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7'
def
get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_busi_data(path: Union[os.PathLike, str], download: bool = False) -> str: 25 """Download the BUSI dataset. 26 27 Args: 28 path: Filepath to a folder where the data is downloaded for further processing. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 Filepath where the data is downloaded. 33 """ 34 data_dir = os.path.join(path, "Dataset_BUSI_with_GT") 35 if os.path.exists(data_dir): 36 return data_dir 37 38 os.makedirs(path, exist_ok=True) 39 40 zip_path = os.path.join(path, "Dataset_BUSI.zip") 41 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False) 42 util.unzip(zip_path=zip_path, dst=path) 43 44 return data_dir
Download the BUSI dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_busi_paths( path: Union[os.PathLike, str], category: Optional[Literal['normal', 'benign', 'malignant']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
47def get_busi_paths( 48 path: Union[os.PathLike, str], 49 category: Optional[Literal["normal", "benign", "malignant"]] = None, 50 download: bool = False 51) -> Tuple[List[str], List[str]]: 52 """Get paths to the BUSI data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 category: The choice of data sub-category. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_busi_data(path=path, download=download) 64 65 if category is None: 66 category = "*" 67 else: 68 if category not in ["normal", "benign", "malignant"]: 69 raise ValueError(f"'{category}' is not a valid category choice.") 70 71 data_dir = os.path.join(data_dir, category) 72 73 image_paths = sorted(glob(os.path.join(data_dir, r"*).png"))) 74 gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png"))) 75 76 return image_paths, gt_paths
Get paths to the BUSI data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- category: The choice of data sub-category.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_busi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
79def get_busi_dataset( 80 path: Union[os.PathLike, str], 81 patch_shape: Tuple[int, int], 82 category: Optional[Literal["normal", "benign", "malignant"]] = None, 83 resize_inputs: bool = False, 84 download: bool = False, 85 **kwargs 86) -> Dataset: 87 """Get the BUSI dataset for breast cancer segmentation. 88 89 Args: 90 path: Filepath to a folder where the data is downloaded for further processing. 91 patch_shape: The patch shape to use for training. 92 category: The choice of data sub-category. 93 resize_inputs: Whether to resize the inputs. 94 download: Whether to download the data if it is not present. 95 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 96 97 Returns: 98 The segmentation dataset. 99 """ 100 image_paths, gt_paths = get_busi_paths(path, category, download) 101 102 if resize_inputs: 103 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 104 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 105 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 106 ) 107 108 return torch_em.default_segmentation_dataset( 109 raw_paths=image_paths, 110 raw_key=None, 111 label_paths=gt_paths, 112 label_key=None, 113 patch_shape=patch_shape, 114 **kwargs 115 )
Get the BUSI dataset for breast cancer segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- category: The choice of data sub-category.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_busi_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], category: Optional[Literal['normal', 'benign', 'malignant']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
118def get_busi_loader( 119 path: Union[os.PathLike, str], 120 batch_size: int, 121 patch_shape: Tuple[int, int], 122 category: Optional[Literal["normal", "benign", "malignant"]] = None, 123 resize_inputs: bool = False, 124 download: bool = False, 125 **kwargs 126) -> DataLoader: 127 """Get the BUSI dataloader for breast cancer segmentation. 128 129 Args: 130 path: Filepath to a folder where the data is downloaded for further processing. 131 batch_size: The batch size for training. 132 patch_shape: The patch shape to use for training. 133 category: The choice of data sub-category. 134 resize_inputs: Whether to resize the inputs. 135 download: Whether to download the data if it is not present. 136 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 137 138 Returns: 139 The DataLoader. 140 """ 141 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 142 dataset = get_busi_dataset(path, patch_shape, category, resize_inputs, download, **ds_kwargs) 143 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the BUSI dataloader for breast cancer segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- category: The choice of data sub-category.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.