torch_em.data.datasets.medical.abus
The ABUS dataset contains annotations for breast cancer segmentation in ultrasound images.
This dataset is located at https://www.kaggle.com/datasets/mohammedtgadallah/mt-small-dataset. The dataset is from the publication https://doi.org/10.1371/journal.pone.0251899. Please cite it if you use this dataset for your research.
1"""The ABUS dataset contains annotations for breast cancer segmentation in ultrasound images. 2 3This dataset is located at https://www.kaggle.com/datasets/mohammedtgadallah/mt-small-dataset. 4The dataset is from the publication https://doi.org/10.1371/journal.pone.0251899. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from natsort import natsorted 11from typing import Tuple, List, Union, Literal 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20def get_abus_data(path: Union[os.PathLike, str], download: bool = False) -> str: 21 """Download the ABUS dataset. 22 23 Args: 24 path: Filepath to a folder where the data is downloaded for further processing. 25 download: Whether to download the data if it is not present. 26 27 Returns: 28 Filepath where the data is downloaded. 29 """ 30 data_dir = os.path.join(path, "MT_Small_Dataset") 31 if os.path.exists(data_dir): 32 return data_dir 33 34 os.makedirs(path, exist_ok=True) 35 36 util.download_source_kaggle(path=path, dataset_name="mohammedtgadallah/mt-small-dataset", download=download) 37 util.unzip(zip_path=os.path.join(path, "mt-small-dataset.zip"), dst=path) 38 39 return data_dir 40 41 42def get_abus_paths( 43 path: Union[os.PathLike, str], 44 split: Literal["train", "val", "test"], 45 category: Literal["benign", "malign"], 46 image_choice: Literal["raw", "fuzzy"] = "raw", 47 download: bool = False, 48) -> Tuple[List[str], List[str]]: 49 """Get paths to the ABUS data. 50 51 Args: 52 path: Filepath to a folder where the data is downloaded for further processing. 53 split: The choice of data split. 54 category: The choice of tumor category. 55 image_choice: The choice of input data. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 List of filepaths for the image data. 60 List of filepaths for the label data. 61 """ 62 data_dir = get_abus_data(path, download) 63 64 if image_choice not in ["raw", "fuzzy"]: 65 raise ValueError("Invalid input choice provided.", image_choice) 66 67 if split not in ["train", "val", "test"]: 68 raise ValueError("Invalid split choice provided.") 69 70 if category not in ["benign", "malign"]: 71 raise ValueError("Invalid tumor category provided.") 72 73 cname = "Benign" if category == "benign" else "Malignant" 74 raw_iname = f"Original_{cname}" if image_choice == "raw" else f"Fuzzy_{cname}" 75 gt_iname = f"Ground_Truth_{cname}" 76 77 image_paths = natsorted(glob(os.path.join(data_dir, cname, raw_iname, "*.png"))) 78 gt_paths = natsorted(glob(os.path.join(data_dir, cname, gt_iname, "*.png"))) 79 80 assert len(image_paths) and len(image_paths) == len(gt_paths) 81 82 if split == "train": 83 image_paths, gt_paths = image_paths[:125], gt_paths[:125] 84 elif split == "val": 85 image_paths, gt_paths = image_paths[125:150], gt_paths[125:150] 86 else: 87 image_paths, gt_paths = image_paths[150:], gt_paths[150:] 88 89 return image_paths, gt_paths 90 91 92def get_abus_dataset( 93 path: Union[os.PathLike, str], 94 patch_shape: Tuple[int, int], 95 category: Literal["benign", "malign"], 96 split: Literal["train", "val", "test"], 97 image_choice: Literal["raw", "fuzzy"] = "raw", 98 resize_inputs: bool = False, 99 download: bool = False, 100 **kwargs 101) -> Dataset: 102 """Get the ABUS dataset for breast cancer segmentation. 103 104 Args: 105 path: Filepath to a folder where the data is downloaded for further processing. 106 patch_shape: The patch shape to use for training. 107 category: The choice of tumor category. 108 split: The choice of data split. 109 image_choice: The choice of input data. 110 resize_inputs: Whether to resize the inputs. 111 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 112 113 Returns: 114 The segmentation dataset. 115 """ 116 image_paths, gt_paths = get_abus_paths(path, split, category, image_choice, download) 117 118 if resize_inputs: 119 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 120 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 121 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 122 ) 123 124 return torch_em.default_segmentation_dataset( 125 raw_paths=image_paths, 126 raw_key=None, 127 label_paths=gt_paths, 128 label_key=None, 129 patch_shape=patch_shape, 130 ndim=2, 131 with_channels=True, 132 is_seg_dataset=False, 133 **kwargs 134 ) 135 136 137def get_abus_loader( 138 path: Union[os.PathLike, str], 139 batch_size: int, 140 patch_shape: Tuple[int, int], 141 category: Literal["benign", "malign"], 142 split: Literal["train", "val", "test"], 143 image_choice: Literal["raw", "fuzzy"] = "raw", 144 resize_inputs: bool = False, 145 download: bool = False, 146 **kwargs 147) -> DataLoader: 148 """Get the ABUS dataloader for breast cancer segmentation. 149 150 Args: 151 path: Filepath to a folder where the data is downloaded for further processing. 152 batch_size: The batch size for training. 153 patch_shape: The patch shape to use for training. 154 category: The choice of tumor category. 155 split: The choice of data split. 156 image_choice: The choice of input data. 157 resize_inputs: Whether to resize the inputs. 158 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 159 160 Returns: 161 The DataLoader. 162 """ 163 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 164 dataset = get_abus_dataset(path, patch_shape, category, split, image_choice, resize_inputs, download, **ds_kwargs) 165 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def
get_abus_data(path: Union[os.PathLike, str], download: bool = False) -> str:
21def get_abus_data(path: Union[os.PathLike, str], download: bool = False) -> str: 22 """Download the ABUS dataset. 23 24 Args: 25 path: Filepath to a folder where the data is downloaded for further processing. 26 download: Whether to download the data if it is not present. 27 28 Returns: 29 Filepath where the data is downloaded. 30 """ 31 data_dir = os.path.join(path, "MT_Small_Dataset") 32 if os.path.exists(data_dir): 33 return data_dir 34 35 os.makedirs(path, exist_ok=True) 36 37 util.download_source_kaggle(path=path, dataset_name="mohammedtgadallah/mt-small-dataset", download=download) 38 util.unzip(zip_path=os.path.join(path, "mt-small-dataset.zip"), dst=path) 39 40 return data_dir
Download the ABUS dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_abus_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], category: Literal['benign', 'malign'], image_choice: Literal['raw', 'fuzzy'] = 'raw', download: bool = False) -> Tuple[List[str], List[str]]:
43def get_abus_paths( 44 path: Union[os.PathLike, str], 45 split: Literal["train", "val", "test"], 46 category: Literal["benign", "malign"], 47 image_choice: Literal["raw", "fuzzy"] = "raw", 48 download: bool = False, 49) -> Tuple[List[str], List[str]]: 50 """Get paths to the ABUS data. 51 52 Args: 53 path: Filepath to a folder where the data is downloaded for further processing. 54 split: The choice of data split. 55 category: The choice of tumor category. 56 image_choice: The choice of input data. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_abus_data(path, download) 64 65 if image_choice not in ["raw", "fuzzy"]: 66 raise ValueError("Invalid input choice provided.", image_choice) 67 68 if split not in ["train", "val", "test"]: 69 raise ValueError("Invalid split choice provided.") 70 71 if category not in ["benign", "malign"]: 72 raise ValueError("Invalid tumor category provided.") 73 74 cname = "Benign" if category == "benign" else "Malignant" 75 raw_iname = f"Original_{cname}" if image_choice == "raw" else f"Fuzzy_{cname}" 76 gt_iname = f"Ground_Truth_{cname}" 77 78 image_paths = natsorted(glob(os.path.join(data_dir, cname, raw_iname, "*.png"))) 79 gt_paths = natsorted(glob(os.path.join(data_dir, cname, gt_iname, "*.png"))) 80 81 assert len(image_paths) and len(image_paths) == len(gt_paths) 82 83 if split == "train": 84 image_paths, gt_paths = image_paths[:125], gt_paths[:125] 85 elif split == "val": 86 image_paths, gt_paths = image_paths[125:150], gt_paths[125:150] 87 else: 88 image_paths, gt_paths = image_paths[150:], gt_paths[150:] 89 90 return image_paths, gt_paths
Get paths to the ABUS data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- category: The choice of tumor category.
- image_choice: The choice of input data.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_abus_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], category: Literal['benign', 'malign'], split: Literal['train', 'val', 'test'], image_choice: Literal['raw', 'fuzzy'] = 'raw', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
93def get_abus_dataset( 94 path: Union[os.PathLike, str], 95 patch_shape: Tuple[int, int], 96 category: Literal["benign", "malign"], 97 split: Literal["train", "val", "test"], 98 image_choice: Literal["raw", "fuzzy"] = "raw", 99 resize_inputs: bool = False, 100 download: bool = False, 101 **kwargs 102) -> Dataset: 103 """Get the ABUS dataset for breast cancer segmentation. 104 105 Args: 106 path: Filepath to a folder where the data is downloaded for further processing. 107 patch_shape: The patch shape to use for training. 108 category: The choice of tumor category. 109 split: The choice of data split. 110 image_choice: The choice of input data. 111 resize_inputs: Whether to resize the inputs. 112 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 113 114 Returns: 115 The segmentation dataset. 116 """ 117 image_paths, gt_paths = get_abus_paths(path, split, category, image_choice, download) 118 119 if resize_inputs: 120 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 121 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 122 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 123 ) 124 125 return torch_em.default_segmentation_dataset( 126 raw_paths=image_paths, 127 raw_key=None, 128 label_paths=gt_paths, 129 label_key=None, 130 patch_shape=patch_shape, 131 ndim=2, 132 with_channels=True, 133 is_seg_dataset=False, 134 **kwargs 135 )
Get the ABUS dataset for breast cancer segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- category: The choice of tumor category.
- split: The choice of data split.
- image_choice: The choice of input data.
- resize_inputs: Whether to resize the inputs.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_abus_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], category: Literal['benign', 'malign'], split: Literal['train', 'val', 'test'], image_choice: Literal['raw', 'fuzzy'] = 'raw', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
138def get_abus_loader( 139 path: Union[os.PathLike, str], 140 batch_size: int, 141 patch_shape: Tuple[int, int], 142 category: Literal["benign", "malign"], 143 split: Literal["train", "val", "test"], 144 image_choice: Literal["raw", "fuzzy"] = "raw", 145 resize_inputs: bool = False, 146 download: bool = False, 147 **kwargs 148) -> DataLoader: 149 """Get the ABUS dataloader for breast cancer segmentation. 150 151 Args: 152 path: Filepath to a folder where the data is downloaded for further processing. 153 batch_size: The batch size for training. 154 patch_shape: The patch shape to use for training. 155 category: The choice of tumor category. 156 split: The choice of data split. 157 image_choice: The choice of input data. 158 resize_inputs: Whether to resize the inputs. 159 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 160 161 Returns: 162 The DataLoader. 163 """ 164 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 165 dataset = get_abus_dataset(path, patch_shape, category, split, image_choice, resize_inputs, download, **ds_kwargs) 166 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the ABUS dataloader for breast cancer segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- category: The choice of tumor category.
- split: The choice of data split.
- image_choice: The choice of input data.
- resize_inputs: Whether to resize the inputs.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.