torch_em.data.datasets.medical.micro_usp
The MicroUSP dataset contains annotations for prostate segmentation in micro-ultrasound scans.
The dataset is from the publication https://doi.org/10.1016/j.compmedimag.2024.102326. Please cite it if you use this dataset for your research.
1"""The MicroUSP dataset contains annotations for prostate segmentation 2in micro-ultrasound scans. 3 4The dataset is from the publication https://doi.org/10.1016/j.compmedimag.2024.102326. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from pathlib import Path 11from natsort import natsorted 12from typing import Union, Tuple, List, Literal 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://zenodo.org/records/10475293/files/Micro_Ultrasound_Prostate_Segmentation_Dataset.zip" 22CHECKSUM = "031645dc30948314e379d0a0a7d54bad1cd4e1f3f918b77455d69810aa05dce3" 23 24 25def get_micro_usp_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Download the MicroUSP dataset. 27 28 Args: 29 path: Filepath to a folder where the data is downloaded for further processing. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 Filepath where the data is downloaded. 34 """ 35 fname = Path(URL).stem 36 data_dir = os.path.join(path, fname) 37 if os.path.exists(data_dir): 38 return data_dir 39 40 os.makedirs(path, exist_ok=True) 41 42 zip_path = os.path.join(path, f"{fname}.zip") 43 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 44 util.unzip(zip_path=zip_path, dst=path) 45 46 return data_dir 47 48 49def get_micro_usp_paths( 50 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 51) -> Tuple[List[str], List[str]]: 52 """Get paths to the MicroUSP data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 split: The choice of data split. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_micro_usp_data(path=path, download=download) 64 65 if split == "test": 66 image_paths = natsorted(glob(os.path.join(data_dir, split, "micro_ultrasound_scans", "*.nii.gz"))) 67 gt_paths = natsorted(glob(os.path.join(data_dir, split, "expert_annotations", "*.nii.gz"))) 68 else: 69 image_paths = natsorted(glob(os.path.join(data_dir, "train", "micro_ultrasound_scans", "*.nii.gz"))) 70 gt_paths = natsorted(glob(os.path.join(data_dir, "train", "expert_annotations", "*.nii.gz"))) 71 72 if split == "train": 73 image_paths, gt_paths = image_paths[:50], gt_paths[:50] 74 elif split == "val": 75 image_paths, gt_paths = image_paths[50:], gt_paths[50:] 76 else: 77 raise ValueError(f"'{split}' is not a valid split.") 78 79 return image_paths, gt_paths 80 81 82def get_micro_usp_dataset( 83 path: Union[os.PathLike, str], 84 patch_shape: Tuple[int, ...], 85 split: Literal['train', 'val', 'test'], 86 resize_inputs: bool = False, 87 download: bool = False, 88 **kwargs 89) -> Dataset: 90 """Get the MicroUSP dataset for segmentation of prostate in micro-ultrasound scans. 91 92 Args: 93 path: Filepath to a folder where the data is downloaded for further processing. 94 patch_shape: The patch shape to use for training. 95 split: The choice of data split. 96 resize_inputs: Whether to resize the inputs to the patch shape. 97 download: Whether to download the data if it is not present. 98 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 99 100 Returns: 101 The segmentation dataset. 102 """ 103 image_paths, gt_paths = get_micro_usp_paths(path, split, download) 104 105 if resize_inputs: 106 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 107 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 108 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 109 ) 110 111 return torch_em.default_segmentation_dataset( 112 raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs 113 ) 114 115 116def get_micro_usp_loader( 117 path: Union[os.PathLike, str], 118 batch_size: int, 119 patch_shape: Tuple[int, ...], 120 split: Literal['train', 'val', 'test'], 121 resize_inputs: bool = False, 122 download: bool = False, 123 **kwargs 124) -> DataLoader: 125 """Get the MicroUSP dataloader for segmentation of prostate in micro-ultrasound scans. 126 127 Args: 128 path: Filepath to a folder where the data is downloaded for further processing. 129 batch_size: The batch size for training. 130 patch_shape: The patch shape to use for training. 131 split: The choice of data split. 132 resize_inputs: Whether to resize the inputs to the patch shape. 133 download: Whether to download the data if it is not present. 134 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 135 136 Returns: 137 The DataLoader. 138 """ 139 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 140 dataset = get_micro_usp_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 141 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/10475293/files/Micro_Ultrasound_Prostate_Segmentation_Dataset.zip'
CHECKSUM =
'031645dc30948314e379d0a0a7d54bad1cd4e1f3f918b77455d69810aa05dce3'
def
get_micro_usp_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_micro_usp_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Download the MicroUSP dataset. 28 29 Args: 30 path: Filepath to a folder where the data is downloaded for further processing. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 Filepath where the data is downloaded. 35 """ 36 fname = Path(URL).stem 37 data_dir = os.path.join(path, fname) 38 if os.path.exists(data_dir): 39 return data_dir 40 41 os.makedirs(path, exist_ok=True) 42 43 zip_path = os.path.join(path, f"{fname}.zip") 44 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 45 util.unzip(zip_path=zip_path, dst=path) 46 47 return data_dir
Download the MicroUSP dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_micro_usp_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
50def get_micro_usp_paths( 51 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 52) -> Tuple[List[str], List[str]]: 53 """Get paths to the MicroUSP data. 54 55 Args: 56 path: Filepath to a folder where the data is downloaded for further processing. 57 split: The choice of data split. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_dir = get_micro_usp_data(path=path, download=download) 65 66 if split == "test": 67 image_paths = natsorted(glob(os.path.join(data_dir, split, "micro_ultrasound_scans", "*.nii.gz"))) 68 gt_paths = natsorted(glob(os.path.join(data_dir, split, "expert_annotations", "*.nii.gz"))) 69 else: 70 image_paths = natsorted(glob(os.path.join(data_dir, "train", "micro_ultrasound_scans", "*.nii.gz"))) 71 gt_paths = natsorted(glob(os.path.join(data_dir, "train", "expert_annotations", "*.nii.gz"))) 72 73 if split == "train": 74 image_paths, gt_paths = image_paths[:50], gt_paths[:50] 75 elif split == "val": 76 image_paths, gt_paths = image_paths[50:], gt_paths[50:] 77 else: 78 raise ValueError(f"'{split}' is not a valid split.") 79 80 return image_paths, gt_paths
Get paths to the MicroUSP data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_micro_usp_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
83def get_micro_usp_dataset( 84 path: Union[os.PathLike, str], 85 patch_shape: Tuple[int, ...], 86 split: Literal['train', 'val', 'test'], 87 resize_inputs: bool = False, 88 download: bool = False, 89 **kwargs 90) -> Dataset: 91 """Get the MicroUSP dataset for segmentation of prostate in micro-ultrasound scans. 92 93 Args: 94 path: Filepath to a folder where the data is downloaded for further processing. 95 patch_shape: The patch shape to use for training. 96 split: The choice of data split. 97 resize_inputs: Whether to resize the inputs to the patch shape. 98 download: Whether to download the data if it is not present. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 100 101 Returns: 102 The segmentation dataset. 103 """ 104 image_paths, gt_paths = get_micro_usp_paths(path, split, download) 105 106 if resize_inputs: 107 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 108 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 109 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 110 ) 111 112 return torch_em.default_segmentation_dataset( 113 raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs 114 )
Get the MicroUSP dataset for segmentation of prostate in micro-ultrasound scans.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_micro_usp_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
117def get_micro_usp_loader( 118 path: Union[os.PathLike, str], 119 batch_size: int, 120 patch_shape: Tuple[int, ...], 121 split: Literal['train', 'val', 'test'], 122 resize_inputs: bool = False, 123 download: bool = False, 124 **kwargs 125) -> DataLoader: 126 """Get the MicroUSP dataloader for segmentation of prostate in micro-ultrasound scans. 127 128 Args: 129 path: Filepath to a folder where the data is downloaded for further processing. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 split: The choice of data split. 133 resize_inputs: Whether to resize the inputs to the patch shape. 134 download: Whether to download the data if it is not present. 135 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 136 137 Returns: 138 The DataLoader. 139 """ 140 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 141 dataset = get_micro_usp_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 142 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the MicroUSP dataloader for segmentation of prostate in micro-ultrasound scans.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.