torch_em.data.datasets.medical.busi
1import os 2from glob import glob 3from typing import Union, Tuple, Optional 4 5import torch_em 6from torch_em.transform.generic import ResizeInputs 7 8from .. import util 9from ... import ImageCollectionDataset 10 11 12URL = "https://scholar.cu.edu.eg/Dataset_BUSI.zip" 13CHECKSUM = "b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7" 14 15 16def get_busi_data(path, download): 17 os.makedirs(path, exist_ok=True) 18 19 data_dir = os.path.join(path, "Dataset_BUSI_with_GT") 20 if os.path.exists(data_dir): 21 return data_dir 22 23 zip_path = os.path.join(path, "Dataset_BUSI.zip") 24 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False) 25 util.unzip(zip_path=zip_path, dst=path) 26 27 return data_dir 28 29 30def _get_busi_paths(path, category, download): 31 data_dir = get_busi_data(path=path, download=download) 32 33 if category is None: 34 category = "*" 35 36 data_dir = os.path.join(data_dir, category) 37 38 image_paths = sorted(glob(os.path.join(data_dir, r"*).png"))) 39 gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png"))) 40 41 return image_paths, gt_paths 42 43 44def get_busi_dataset( 45 path: Union[os.PathLike, str], 46 patch_shape: Tuple[int, int], 47 category: Optional[str] = None, 48 resize_inputs: bool = False, 49 download: bool = False, 50 **kwargs 51): 52 """"Dataset for segmentation of breast cancer in ultrasound images. 53 54 This database is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset 55 56 The dataset is from Al-Dhabyani et al. - https://doi.org/10.1016/j.dib.2019.104863 57 Please cite it if you use this dataset for a publication. 58 """ 59 if category is not None: 60 assert category in ["normal", "benign", "malignant"] 61 62 image_paths, gt_paths = _get_busi_paths(path=path, category=category, download=download) 63 64 if resize_inputs: 65 raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True) 66 label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True) 67 patch_shape = None 68 else: 69 patch_shape = patch_shape 70 raw_trafo, label_trafo = None, None 71 72 dataset = ImageCollectionDataset( 73 raw_image_paths=image_paths, 74 label_image_paths=gt_paths, 75 patch_shape=patch_shape, 76 raw_transform=raw_trafo, 77 label_transform=label_trafo, 78 **kwargs 79 ) 80 81 return dataset 82 83 84def get_busi_loader( 85 path: Union[os.PathLike, str], 86 patch_shape: Tuple[int, int], 87 batch_size: int, 88 category: Optional[str] = None, 89 resize_inputs: bool = False, 90 download: bool = False, 91 **kwargs 92): 93 """Dataloader for segmentation of breast cancer in ultrasound images. See `get_busi_dataset` for details. 94 """ 95 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 96 dataset = get_busi_dataset( 97 path=path, 98 patch_shape=patch_shape, 99 category=category, 100 resize_inputs=resize_inputs, 101 download=download, 102 **ds_kwargs 103 ) 104 loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) 105 return loader
URL =
'https://scholar.cu.edu.eg/Dataset_BUSI.zip'
CHECKSUM =
'b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7'
def
get_busi_data(path, download):
17def get_busi_data(path, download): 18 os.makedirs(path, exist_ok=True) 19 20 data_dir = os.path.join(path, "Dataset_BUSI_with_GT") 21 if os.path.exists(data_dir): 22 return data_dir 23 24 zip_path = os.path.join(path, "Dataset_BUSI.zip") 25 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False) 26 util.unzip(zip_path=zip_path, dst=path) 27 28 return data_dir
def
get_busi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs):
45def get_busi_dataset( 46 path: Union[os.PathLike, str], 47 patch_shape: Tuple[int, int], 48 category: Optional[str] = None, 49 resize_inputs: bool = False, 50 download: bool = False, 51 **kwargs 52): 53 """"Dataset for segmentation of breast cancer in ultrasound images. 54 55 This database is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset 56 57 The dataset is from Al-Dhabyani et al. - https://doi.org/10.1016/j.dib.2019.104863 58 Please cite it if you use this dataset for a publication. 59 """ 60 if category is not None: 61 assert category in ["normal", "benign", "malignant"] 62 63 image_paths, gt_paths = _get_busi_paths(path=path, category=category, download=download) 64 65 if resize_inputs: 66 raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True) 67 label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True) 68 patch_shape = None 69 else: 70 patch_shape = patch_shape 71 raw_trafo, label_trafo = None, None 72 73 dataset = ImageCollectionDataset( 74 raw_image_paths=image_paths, 75 label_image_paths=gt_paths, 76 patch_shape=patch_shape, 77 raw_transform=raw_trafo, 78 label_transform=label_trafo, 79 **kwargs 80 ) 81 82 return dataset
"Dataset for segmentation of breast cancer in ultrasound images.
This database is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset
The dataset is from Al-Dhabyani et al. - https://doi.org/10.1016/j.dib.2019.104863 Please cite it if you use this dataset for a publication.
def
get_busi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, category: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs):
85def get_busi_loader( 86 path: Union[os.PathLike, str], 87 patch_shape: Tuple[int, int], 88 batch_size: int, 89 category: Optional[str] = None, 90 resize_inputs: bool = False, 91 download: bool = False, 92 **kwargs 93): 94 """Dataloader for segmentation of breast cancer in ultrasound images. See `get_busi_dataset` for details. 95 """ 96 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 97 dataset = get_busi_dataset( 98 path=path, 99 patch_shape=patch_shape, 100 category=category, 101 resize_inputs=resize_inputs, 102 download=download, 103 **ds_kwargs 104 ) 105 loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) 106 return loader
Dataloader for segmentation of breast cancer in ultrasound images. See get_busi_dataset
for details.