torch_em.data.datasets.medical.busi

  1import os
  2from glob import glob
  3from typing import Union, Tuple, Optional
  4
  5import torch_em
  6from torch_em.transform.generic import ResizeInputs
  7
  8from .. import util
  9from ... import ImageCollectionDataset
 10
 11
 12URL = "https://scholar.cu.edu.eg/Dataset_BUSI.zip"
 13CHECKSUM = "b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7"
 14
 15
 16def get_busi_data(path, download):
 17    os.makedirs(path, exist_ok=True)
 18
 19    data_dir = os.path.join(path, "Dataset_BUSI_with_GT")
 20    if os.path.exists(data_dir):
 21        return data_dir
 22
 23    zip_path = os.path.join(path, "Dataset_BUSI.zip")
 24    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False)
 25    util.unzip(zip_path=zip_path, dst=path)
 26
 27    return data_dir
 28
 29
 30def _get_busi_paths(path, category, download):
 31    data_dir = get_busi_data(path=path, download=download)
 32
 33    if category is None:
 34        category = "*"
 35
 36    data_dir = os.path.join(data_dir, category)
 37
 38    image_paths = sorted(glob(os.path.join(data_dir, r"*).png")))
 39    gt_paths = sorted(glob(os.path.join(data_dir, r"*)_mask.png")))
 40
 41    return image_paths, gt_paths
 42
 43
 44def get_busi_dataset(
 45    path: Union[os.PathLike, str],
 46    patch_shape: Tuple[int, int],
 47    category: Optional[str] = None,
 48    resize_inputs: bool = False,
 49    download: bool = False,
 50    **kwargs
 51):
 52    """"Dataset for segmentation of breast cancer in ultrasound images.
 53
 54    This database is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset
 55
 56    The dataset is from Al-Dhabyani et al. - https://doi.org/10.1016/j.dib.2019.104863
 57    Please cite it if you use this dataset for a publication.
 58    """
 59    if category is not None:
 60        assert category in ["normal", "benign", "malignant"]
 61
 62    image_paths, gt_paths = _get_busi_paths(path=path, category=category, download=download)
 63
 64    if resize_inputs:
 65        raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True)
 66        label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True)
 67        patch_shape = None
 68    else:
 69        patch_shape = patch_shape
 70        raw_trafo, label_trafo = None, None
 71
 72    dataset = ImageCollectionDataset(
 73        raw_image_paths=image_paths,
 74        label_image_paths=gt_paths,
 75        patch_shape=patch_shape,
 76        raw_transform=raw_trafo,
 77        label_transform=label_trafo,
 78        **kwargs
 79    )
 80
 81    return dataset
 82
 83
 84def get_busi_loader(
 85    path: Union[os.PathLike, str],
 86    patch_shape: Tuple[int, int],
 87    batch_size: int,
 88    category: Optional[str] = None,
 89    resize_inputs: bool = False,
 90    download: bool = False,
 91    **kwargs
 92):
 93    """Dataloader for segmentation of breast cancer in ultrasound images. See `get_busi_dataset` for details.
 94    """
 95    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
 96    dataset = get_busi_dataset(
 97        path=path,
 98        patch_shape=patch_shape,
 99        category=category,
100        resize_inputs=resize_inputs,
101        download=download,
102        **ds_kwargs
103    )
104    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
105    return loader
URL = 'https://scholar.cu.edu.eg/Dataset_BUSI.zip'
CHECKSUM = 'b2ce09f6063a31a73f628b6a6ee1245187cbaec225e93e563735691d68654de7'
def get_busi_data(path, download):
17def get_busi_data(path, download):
18    os.makedirs(path, exist_ok=True)
19
20    data_dir = os.path.join(path, "Dataset_BUSI_with_GT")
21    if os.path.exists(data_dir):
22        return data_dir
23
24    zip_path = os.path.join(path, "Dataset_BUSI.zip")
25    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM, verify=False)
26    util.unzip(zip_path=zip_path, dst=path)
27
28    return data_dir
def get_busi_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs):
45def get_busi_dataset(
46    path: Union[os.PathLike, str],
47    patch_shape: Tuple[int, int],
48    category: Optional[str] = None,
49    resize_inputs: bool = False,
50    download: bool = False,
51    **kwargs
52):
53    """"Dataset for segmentation of breast cancer in ultrasound images.
54
55    This database is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset
56
57    The dataset is from Al-Dhabyani et al. - https://doi.org/10.1016/j.dib.2019.104863
58    Please cite it if you use this dataset for a publication.
59    """
60    if category is not None:
61        assert category in ["normal", "benign", "malignant"]
62
63    image_paths, gt_paths = _get_busi_paths(path=path, category=category, download=download)
64
65    if resize_inputs:
66        raw_trafo = ResizeInputs(target_shape=patch_shape, is_rgb=True)
67        label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True)
68        patch_shape = None
69    else:
70        patch_shape = patch_shape
71        raw_trafo, label_trafo = None, None
72
73    dataset = ImageCollectionDataset(
74        raw_image_paths=image_paths,
75        label_image_paths=gt_paths,
76        patch_shape=patch_shape,
77        raw_transform=raw_trafo,
78        label_transform=label_trafo,
79        **kwargs
80    )
81
82    return dataset

"Dataset for segmentation of breast cancer in ultrasound images.

This database is located at https://scholar.cu.edu.eg/?q=afahmy/pages/dataset

The dataset is from Al-Dhabyani et al. - https://doi.org/10.1016/j.dib.2019.104863 Please cite it if you use this dataset for a publication.

def get_busi_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, category: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs):
 85def get_busi_loader(
 86    path: Union[os.PathLike, str],
 87    patch_shape: Tuple[int, int],
 88    batch_size: int,
 89    category: Optional[str] = None,
 90    resize_inputs: bool = False,
 91    download: bool = False,
 92    **kwargs
 93):
 94    """Dataloader for segmentation of breast cancer in ultrasound images. See `get_busi_dataset` for details.
 95    """
 96    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
 97    dataset = get_busi_dataset(
 98        path=path,
 99        patch_shape=patch_shape,
100        category=category,
101        resize_inputs=resize_inputs,
102        download=download,
103        **ds_kwargs
104    )
105    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
106    return loader

Dataloader for segmentation of breast cancer in ultrasound images. See get_busi_dataset for details.