torch_em.data.datasets.medical.micro_usp

The MicroUSP dataset contains annotations for prostate segmentation in micro-ultrasound scans.

The dataset is from the publication https://doi.org/10.1016/j.compmedimag.2024.102326. Please cite it if you use this dataset for your research.

View Source

  1"""The MicroUSP dataset contains annotations for prostate segmentation
  2in micro-ultrasound scans.
  3
  4The dataset is from the publication https://doi.org/10.1016/j.compmedimag.2024.102326.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from pathlib import Path
 11from natsort import natsorted
 12from typing import Union, Tuple, List, Literal
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/10475293/files/Micro_Ultrasound_Prostate_Segmentation_Dataset.zip"
 22CHECKSUM = "031645dc30948314e379d0a0a7d54bad1cd4e1f3f918b77455d69810aa05dce3"
 23
 24
 25def get_micro_usp_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the MicroUSP dataset.
 27
 28    Args:
 29        path: Filepath to a folder where the data is downloaded for further processing.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        Filepath where the data is downloaded.
 34    """
 35    fname = Path(URL).stem
 36    data_dir = os.path.join(path, fname)
 37    if os.path.exists(data_dir):
 38        return data_dir
 39
 40    os.makedirs(path, exist_ok=True)
 41
 42    zip_path = os.path.join(path, f"{fname}.zip")
 43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 44    util.unzip(zip_path=zip_path, dst=path)
 45
 46    return data_dir
 47
 48
 49def get_micro_usp_paths(
 50    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
 51) -> Tuple[List[str], List[str]]:
 52    """Get paths to the MicroUSP data.
 53
 54    Args:
 55        path: Filepath to a folder where the data is downloaded for further processing.
 56        split: The choice of data split.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        List of filepaths for the image data.
 61        List of filepaths for the label data.
 62    """
 63    data_dir = get_micro_usp_data(path=path, download=download)
 64
 65    if split == "test":
 66        image_paths = natsorted(glob(os.path.join(data_dir, split, "micro_ultrasound_scans", "*.nii.gz")))
 67        gt_paths = natsorted(glob(os.path.join(data_dir, split, "expert_annotations", "*.nii.gz")))
 68    else:
 69        image_paths = natsorted(glob(os.path.join(data_dir, "train", "micro_ultrasound_scans", "*.nii.gz")))
 70        gt_paths = natsorted(glob(os.path.join(data_dir, "train", "expert_annotations", "*.nii.gz")))
 71
 72        if split == "train":
 73            image_paths, gt_paths = image_paths[:50], gt_paths[:50]
 74        elif split == "val":
 75            image_paths, gt_paths = image_paths[50:], gt_paths[50:]
 76        else:
 77            raise ValueError(f"'{split}' is not a valid split.")
 78
 79    return image_paths, gt_paths
 80
 81
 82def get_micro_usp_dataset(
 83    path: Union[os.PathLike, str],
 84    patch_shape: Tuple[int, ...],
 85    split: Literal['train', 'val', 'test'],
 86    resize_inputs: bool = False,
 87    download: bool = False,
 88    **kwargs
 89) -> Dataset:
 90    """Get the MicroUSP dataset for segmentation of prostate in micro-ultrasound scans.
 91
 92    Args:
 93        path: Filepath to a folder where the data is downloaded for further processing.
 94        patch_shape: The patch shape to use for training.
 95        split: The choice of data split.
 96        resize_inputs: Whether to resize the inputs to the patch shape.
 97        download: Whether to download the data if it is not present.
 98        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 99
100    Returns:
101        The segmentation dataset.
102    """
103    image_paths, gt_paths = get_micro_usp_paths(path, split, download)
104
105    if resize_inputs:
106        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
107        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
108            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
109        )
110
111    return torch_em.default_segmentation_dataset(
112        raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs
113    )
114
115
116def get_micro_usp_loader(
117    path: Union[os.PathLike, str],
118    batch_size: int,
119    patch_shape: Tuple[int, ...],
120    split: Literal['train', 'val', 'test'],
121    resize_inputs: bool = False,
122    download: bool = False,
123    **kwargs
124) -> DataLoader:
125    """Get the MicroUSP dataloader for segmentation of prostate in micro-ultrasound scans.
126
127    Args:
128        path: Filepath to a folder where the data is downloaded for further processing.
129        batch_size: The batch size for training.
130        patch_shape: The patch shape to use for training.
131        split: The choice of data split.
132        resize_inputs: Whether to resize the inputs to the patch shape.
133        download: Whether to download the data if it is not present.
134        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
135
136    Returns:
137        The DataLoader.
138    """
139    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
140    dataset = get_micro_usp_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
141    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://zenodo.org/records/10475293/files/Micro_Ultrasound_Prostate_Segmentation_Dataset.zip'

CHECKSUM = '031645dc30948314e379d0a0a7d54bad1cd4e1f3f918b77455d69810aa05dce3'

def get_micro_usp_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

26def get_micro_usp_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the MicroUSP dataset.
28
29    Args:
30        path: Filepath to a folder where the data is downloaded for further processing.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        Filepath where the data is downloaded.
35    """
36    fname = Path(URL).stem
37    data_dir = os.path.join(path, fname)
38    if os.path.exists(data_dir):
39        return data_dir
40
41    os.makedirs(path, exist_ok=True)
42
43    zip_path = os.path.join(path, f"{fname}.zip")
44    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
45    util.unzip(zip_path=zip_path, dst=path)
46
47    return data_dir

Download the MicroUSP dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_micro_usp_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

50def get_micro_usp_paths(
51    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
52) -> Tuple[List[str], List[str]]:
53    """Get paths to the MicroUSP data.
54
55    Args:
56        path: Filepath to a folder where the data is downloaded for further processing.
57        split: The choice of data split.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        List of filepaths for the image data.
62        List of filepaths for the label data.
63    """
64    data_dir = get_micro_usp_data(path=path, download=download)
65
66    if split == "test":
67        image_paths = natsorted(glob(os.path.join(data_dir, split, "micro_ultrasound_scans", "*.nii.gz")))
68        gt_paths = natsorted(glob(os.path.join(data_dir, split, "expert_annotations", "*.nii.gz")))
69    else:
70        image_paths = natsorted(glob(os.path.join(data_dir, "train", "micro_ultrasound_scans", "*.nii.gz")))
71        gt_paths = natsorted(glob(os.path.join(data_dir, "train", "expert_annotations", "*.nii.gz")))
72
73        if split == "train":
74            image_paths, gt_paths = image_paths[:50], gt_paths[:50]
75        elif split == "val":
76            image_paths, gt_paths = image_paths[50:], gt_paths[50:]
77        else:
78            raise ValueError(f"'{split}' is not a valid split.")
79
80    return image_paths, gt_paths

Get paths to the MicroUSP data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_micro_usp_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 83def get_micro_usp_dataset(
 84    path: Union[os.PathLike, str],
 85    patch_shape: Tuple[int, ...],
 86    split: Literal['train', 'val', 'test'],
 87    resize_inputs: bool = False,
 88    download: bool = False,
 89    **kwargs
 90) -> Dataset:
 91    """Get the MicroUSP dataset for segmentation of prostate in micro-ultrasound scans.
 92
 93    Args:
 94        path: Filepath to a folder where the data is downloaded for further processing.
 95        patch_shape: The patch shape to use for training.
 96        split: The choice of data split.
 97        resize_inputs: Whether to resize the inputs to the patch shape.
 98        download: Whether to download the data if it is not present.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
100
101    Returns:
102        The segmentation dataset.
103    """
104    image_paths, gt_paths = get_micro_usp_paths(path, split, download)
105
106    if resize_inputs:
107        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
108        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
109            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
110        )
111
112    return torch_em.default_segmentation_dataset(
113        raw_paths=image_paths, raw_key="data", label_paths=gt_paths, label_key="data", patch_shape=patch_shape, **kwargs
114    )

Get the MicroUSP dataset for segmentation of prostate in micro-ultrasound scans.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_micro_usp_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

117def get_micro_usp_loader(
118    path: Union[os.PathLike, str],
119    batch_size: int,
120    patch_shape: Tuple[int, ...],
121    split: Literal['train', 'val', 'test'],
122    resize_inputs: bool = False,
123    download: bool = False,
124    **kwargs
125) -> DataLoader:
126    """Get the MicroUSP dataloader for segmentation of prostate in micro-ultrasound scans.
127
128    Args:
129        path: Filepath to a folder where the data is downloaded for further processing.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        split: The choice of data split.
133        resize_inputs: Whether to resize the inputs to the patch shape.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
136
137    Returns:
138        The DataLoader.
139    """
140    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
141    dataset = get_micro_usp_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
142    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the MicroUSP dataloader for segmentation of prostate in micro-ultrasound scans.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.