torch_em.data.datasets.medical.abus

The ABUS dataset contains annotations for breast cancer segmentation in ultrasound images.

This dataset is located at https://www.kaggle.com/datasets/mohammedtgadallah/mt-small-dataset. The dataset is from the publication https://doi.org/10.1371/journal.pone.0251899. Please cite it if you use this dataset for your research.

  1"""The ABUS dataset contains annotations for breast cancer segmentation in ultrasound images.
  2
  3This dataset is located at https://www.kaggle.com/datasets/mohammedtgadallah/mt-small-dataset.
  4The dataset is from the publication https://doi.org/10.1371/journal.pone.0251899.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from natsort import natsorted
 11from typing import Tuple, List, Union, Literal
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20def get_abus_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 21    """Download the ABUS dataset.
 22
 23    Args:
 24        path: Filepath to a folder where the data is downloaded for further processing.
 25        download: Whether to download the data if it is not present.
 26
 27    Returns:
 28        Filepath where the data is downloaded.
 29    """
 30    data_dir = os.path.join(path, "MT_Small_Dataset")
 31    if os.path.exists(data_dir):
 32        return data_dir
 33
 34    os.makedirs(path, exist_ok=True)
 35
 36    util.download_source_kaggle(path=path, dataset_name="mohammedtgadallah/mt-small-dataset", download=download)
 37    util.unzip(zip_path=os.path.join(path, "mt-small-dataset.zip"), dst=path)
 38
 39    return data_dir
 40
 41
 42def get_abus_paths(
 43    path: Union[os.PathLike, str],
 44    split: Literal["train", "val", "test"],
 45    category: Literal["benign", "malign"],
 46    image_choice: Literal["raw", "fuzzy"] = "raw",
 47    download: bool = False,
 48) -> Tuple[List[str], List[str]]:
 49    """Get paths to the ABUS data.
 50
 51    Args:
 52        path: Filepath to a folder where the data is downloaded for further processing.
 53        split: The choice of data split.
 54        category: The choice of tumor category.
 55        image_choice: The choice of input data.
 56        download: Whether to download the data if it is not present.
 57
 58    Returns:
 59        List of filepaths for the image data.
 60        List of filepaths for the label data.
 61    """
 62    data_dir = get_abus_data(path, download)
 63
 64    if image_choice not in ["raw", "fuzzy"]:
 65        raise ValueError("Invalid input choice provided.", image_choice)
 66
 67    if split not in ["train", "val", "test"]:
 68        raise ValueError("Invalid split choice provided.")
 69
 70    if category not in ["benign", "malign"]:
 71        raise ValueError("Invalid tumor category provided.")
 72
 73    cname = "Benign" if category == "benign" else "Malignant"
 74    raw_iname = f"Original_{cname}" if image_choice == "raw" else f"Fuzzy_{cname}"
 75    gt_iname = f"Ground_Truth_{cname}"
 76
 77    image_paths = natsorted(glob(os.path.join(data_dir, cname, raw_iname, "*.png")))
 78    gt_paths = natsorted(glob(os.path.join(data_dir, cname, gt_iname, "*.png")))
 79
 80    assert len(image_paths) and len(image_paths) == len(gt_paths)
 81
 82    if split == "train":
 83        image_paths, gt_paths = image_paths[:125], gt_paths[:125]
 84    elif split == "val":
 85        image_paths, gt_paths = image_paths[125:150], gt_paths[125:150]
 86    else:
 87        image_paths, gt_paths = image_paths[150:], gt_paths[150:]
 88
 89    return image_paths, gt_paths
 90
 91
 92def get_abus_dataset(
 93    path: Union[os.PathLike, str],
 94    patch_shape: Tuple[int, int],
 95    category: Literal["benign", "malign"],
 96    split: Literal["train", "val", "test"],
 97    image_choice: Literal["raw", "fuzzy"] = "raw",
 98    resize_inputs: bool = False,
 99    download: bool = False,
100    **kwargs
101) -> Dataset:
102    """Get the ABUS dataset for breast cancer segmentation.
103
104    Args:
105        path: Filepath to a folder where the data is downloaded for further processing.
106        patch_shape: The patch shape to use for training.
107        category: The choice of tumor category.
108        split: The choice of data split.
109        image_choice: The choice of input data.
110        resize_inputs: Whether to resize the inputs.
111        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
112
113    Returns:
114        The segmentation dataset.
115    """
116    image_paths, gt_paths = get_abus_paths(path, split, category, image_choice, download)
117
118    if resize_inputs:
119        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
120        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
121            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
122        )
123
124    return torch_em.default_segmentation_dataset(
125        raw_paths=image_paths,
126        raw_key=None,
127        label_paths=gt_paths,
128        label_key=None,
129        patch_shape=patch_shape,
130        ndim=2,
131        with_channels=True,
132        is_seg_dataset=False,
133        **kwargs
134    )
135
136
137def get_abus_loader(
138    path: Union[os.PathLike, str],
139    batch_size: int,
140    patch_shape: Tuple[int, int],
141    category: Literal["benign", "malign"],
142    split: Literal["train", "val", "test"],
143    image_choice: Literal["raw", "fuzzy"] = "raw",
144    resize_inputs: bool = False,
145    download: bool = False,
146    **kwargs
147) -> DataLoader:
148    """Get the ABUS dataloader for breast cancer segmentation.
149
150    Args:
151        path: Filepath to a folder where the data is downloaded for further processing.
152        batch_size: The batch size for training.
153        patch_shape: The patch shape to use for training.
154        category: The choice of tumor category.
155        split: The choice of data split.
156        image_choice: The choice of input data.
157        resize_inputs: Whether to resize the inputs.
158        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
159
160    Returns:
161        The DataLoader.
162    """
163    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
164    dataset = get_abus_dataset(path, patch_shape, category, split, image_choice, resize_inputs, download, **ds_kwargs)
165    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def get_abus_data(path: Union[os.PathLike, str], download: bool = False) -> str:
21def get_abus_data(path: Union[os.PathLike, str], download: bool = False) -> str:
22    """Download the ABUS dataset.
23
24    Args:
25        path: Filepath to a folder where the data is downloaded for further processing.
26        download: Whether to download the data if it is not present.
27
28    Returns:
29        Filepath where the data is downloaded.
30    """
31    data_dir = os.path.join(path, "MT_Small_Dataset")
32    if os.path.exists(data_dir):
33        return data_dir
34
35    os.makedirs(path, exist_ok=True)
36
37    util.download_source_kaggle(path=path, dataset_name="mohammedtgadallah/mt-small-dataset", download=download)
38    util.unzip(zip_path=os.path.join(path, "mt-small-dataset.zip"), dst=path)
39
40    return data_dir

Download the ABUS dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_abus_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], category: Literal['benign', 'malign'], image_choice: Literal['raw', 'fuzzy'] = 'raw', download: bool = False) -> Tuple[List[str], List[str]]:
43def get_abus_paths(
44    path: Union[os.PathLike, str],
45    split: Literal["train", "val", "test"],
46    category: Literal["benign", "malign"],
47    image_choice: Literal["raw", "fuzzy"] = "raw",
48    download: bool = False,
49) -> Tuple[List[str], List[str]]:
50    """Get paths to the ABUS data.
51
52    Args:
53        path: Filepath to a folder where the data is downloaded for further processing.
54        split: The choice of data split.
55        category: The choice of tumor category.
56        image_choice: The choice of input data.
57        download: Whether to download the data if it is not present.
58
59    Returns:
60        List of filepaths for the image data.
61        List of filepaths for the label data.
62    """
63    data_dir = get_abus_data(path, download)
64
65    if image_choice not in ["raw", "fuzzy"]:
66        raise ValueError("Invalid input choice provided.", image_choice)
67
68    if split not in ["train", "val", "test"]:
69        raise ValueError("Invalid split choice provided.")
70
71    if category not in ["benign", "malign"]:
72        raise ValueError("Invalid tumor category provided.")
73
74    cname = "Benign" if category == "benign" else "Malignant"
75    raw_iname = f"Original_{cname}" if image_choice == "raw" else f"Fuzzy_{cname}"
76    gt_iname = f"Ground_Truth_{cname}"
77
78    image_paths = natsorted(glob(os.path.join(data_dir, cname, raw_iname, "*.png")))
79    gt_paths = natsorted(glob(os.path.join(data_dir, cname, gt_iname, "*.png")))
80
81    assert len(image_paths) and len(image_paths) == len(gt_paths)
82
83    if split == "train":
84        image_paths, gt_paths = image_paths[:125], gt_paths[:125]
85    elif split == "val":
86        image_paths, gt_paths = image_paths[125:150], gt_paths[125:150]
87    else:
88        image_paths, gt_paths = image_paths[150:], gt_paths[150:]
89
90    return image_paths, gt_paths

Get paths to the ABUS data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The choice of data split.
  • category: The choice of tumor category.
  • image_choice: The choice of input data.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_abus_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Literal['benign', 'malign'], split: Literal['train', 'val', 'test'], image_choice: Literal['raw', 'fuzzy'] = 'raw', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 93def get_abus_dataset(
 94    path: Union[os.PathLike, str],
 95    patch_shape: Tuple[int, int],
 96    category: Literal["benign", "malign"],
 97    split: Literal["train", "val", "test"],
 98    image_choice: Literal["raw", "fuzzy"] = "raw",
 99    resize_inputs: bool = False,
100    download: bool = False,
101    **kwargs
102) -> Dataset:
103    """Get the ABUS dataset for breast cancer segmentation.
104
105    Args:
106        path: Filepath to a folder where the data is downloaded for further processing.
107        patch_shape: The patch shape to use for training.
108        category: The choice of tumor category.
109        split: The choice of data split.
110        image_choice: The choice of input data.
111        resize_inputs: Whether to resize the inputs.
112        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
113
114    Returns:
115        The segmentation dataset.
116    """
117    image_paths, gt_paths = get_abus_paths(path, split, category, image_choice, download)
118
119    if resize_inputs:
120        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
121        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
122            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
123        )
124
125    return torch_em.default_segmentation_dataset(
126        raw_paths=image_paths,
127        raw_key=None,
128        label_paths=gt_paths,
129        label_key=None,
130        patch_shape=patch_shape,
131        ndim=2,
132        with_channels=True,
133        is_seg_dataset=False,
134        **kwargs
135    )

Get the ABUS dataset for breast cancer segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • category: The choice of tumor category.
  • split: The choice of data split.
  • image_choice: The choice of input data.
  • resize_inputs: Whether to resize the inputs.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_abus_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], category: Literal['benign', 'malign'], split: Literal['train', 'val', 'test'], image_choice: Literal['raw', 'fuzzy'] = 'raw', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
138def get_abus_loader(
139    path: Union[os.PathLike, str],
140    batch_size: int,
141    patch_shape: Tuple[int, int],
142    category: Literal["benign", "malign"],
143    split: Literal["train", "val", "test"],
144    image_choice: Literal["raw", "fuzzy"] = "raw",
145    resize_inputs: bool = False,
146    download: bool = False,
147    **kwargs
148) -> DataLoader:
149    """Get the ABUS dataloader for breast cancer segmentation.
150
151    Args:
152        path: Filepath to a folder where the data is downloaded for further processing.
153        batch_size: The batch size for training.
154        patch_shape: The patch shape to use for training.
155        category: The choice of tumor category.
156        split: The choice of data split.
157        image_choice: The choice of input data.
158        resize_inputs: Whether to resize the inputs.
159        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
160
161    Returns:
162        The DataLoader.
163    """
164    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
165    dataset = get_abus_dataset(path, patch_shape, category, split, image_choice, resize_inputs, download, **ds_kwargs)
166    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the ABUS dataloader for breast cancer segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • category: The choice of tumor category.
  • split: The choice of data split.
  • image_choice: The choice of input data.
  • resize_inputs: Whether to resize the inputs.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.