torch_em.data.datasets.medical.psfhs

The PSFHS dataset contains annotations for segmentation of pubic symphysis and fetal head in ultrasound images.

This dataset is located at https://zenodo.org/records/10969427. The dataset is from the publication https://doi.org/10.1038/s41597-024-03266-4. Please cite it if you use this dataset for your research.

View Source

  1"""The PSFHS dataset contains annotations for segmentation of pubic symphysis and fetal head
  2in ultrasound images.
  3
  4This dataset is located at https://zenodo.org/records/10969427.
  5The dataset is from the publication https://doi.org/10.1038/s41597-024-03266-4.
  6Please cite it if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, Literal, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/10969427/files/PSFHS.zip"
 22CHECKSUM = "3f4a8126c84640e4d1b8a4e296d0dfd599cea6529b64b9ee00e5489bfd17ea95"
 23
 24
 25def get_psfhs_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the PSFHS data.
 27
 28    Args:
 29        path: Filepath to a folder where the data is downloaded for further processing.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        Filepath where the data is downloaded.
 34    """
 35    data_dir = os.path.join(path, "PSFHS")
 36    if os.path.exists(data_dir):
 37        return data_dir
 38
 39    os.makedirs(path, exist_ok=True)
 40
 41    zip_path = os.path.join(path, "PSFHS.zip")
 42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 43    util.unzip(zip_path=zip_path, dst=path)
 44
 45    return data_dir
 46
 47
 48def get_psfhs_paths(
 49    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
 50) -> Tuple[List[int], List[int]]:
 51    """Get paths to the PSFHS dataset.
 52
 53    Args:
 54        path: Filepath to a folder where the data is downloaded for further processing.
 55        split: The choice of data split.
 56        download: Whether to download the data if it is not present.
 57
 58    Returns:
 59        List of filepaths for the image data.
 60        List of filepaths for the label data.
 61    """
 62    data_dir = get_psfhs_data(path, download)
 63
 64    raw_paths = natsorted(glob(os.path.join(data_dir, "image_mha", "*.mha")))
 65    label_paths = natsorted(glob(os.path.join(data_dir, "label_mha", "*.mha")))
 66
 67    if split == "train":
 68        raw_paths, label_paths = raw_paths[:900], label_paths[:900]
 69    elif split == "val":
 70        raw_paths, label_paths = raw_paths[900:1050], label_paths[900:1050]
 71    elif split == "test":
 72        raw_paths, label_paths = raw_paths[1050:], label_paths[1050:]
 73    else:
 74        raise ValueError(f"'{split}' is not a valid split.")
 75
 76    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 77
 78    return raw_paths, label_paths
 79
 80
 81def get_psfhs_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, int],
 84    split: Literal['train', 'val', 'test'],
 85    resize_inputs: bool = False,
 86    download: bool = False,
 87    **kwargs
 88) -> Dataset:
 89    """Get the PSFHS dataset for segmentation of pubic symphysis and fetal head.
 90
 91    Args:
 92        path: Filepath to a folder where the data is downloaded for further processing.
 93        patch_shape: The patch shape to use for training.
 94        split: The choice of data split.
 95        resize_inputs: Whether to resize the inputs to the patch shape.
 96        download: Whether to download the data if it is not present.
 97        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 98
 99    Returns:
100        The segmentation dataset.
101    """
102    raw_paths, label_paths = get_psfhs_paths(path, split, download)
103
104    if resize_inputs:
105        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
106        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
107            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
108        )
109
110    return torch_em.default_segmentation_dataset(
111        raw_paths=raw_paths,
112        raw_key=None,
113        label_paths=label_paths,
114        label_key=None,
115        ndim=2,
116        is_seg_dataset=False,
117        with_channels=True,
118        patch_shape=patch_shape,
119        **kwargs
120    )
121
122
123def get_psfhs_loader(
124    path: Union[os.PathLike, str],
125    batch_size: int,
126    patch_shape: Tuple[int, int],
127    split: Literal['train', 'val', 'test'],
128    resize_inputs: bool = False,
129    download: bool = False,
130    **kwargs
131) -> DataLoader:
132    """Get the PSFHS dataset for segmentation of pubic symphysis and fetal head.
133
134    Args:
135        path: Filepath to a folder where the data is downloaded for further processing.
136        batch_size: The batch size for training.
137        patch_shape: The patch shape to use for training.
138        split: The choice of data split.
139        download: Whether to download the data if it is not present.
140        resize_inputs: Whether to resize the inputs to the patch shape.
141        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
142
143    Returns:
144        The segmentation dataset.
145    """
146    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
147    dataset = get_psfhs_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
148    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://zenodo.org/records/10969427/files/PSFHS.zip'

CHECKSUM = '3f4a8126c84640e4d1b8a4e296d0dfd599cea6529b64b9ee00e5489bfd17ea95'

def get_psfhs_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

26def get_psfhs_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the PSFHS data.
28
29    Args:
30        path: Filepath to a folder where the data is downloaded for further processing.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        Filepath where the data is downloaded.
35    """
36    data_dir = os.path.join(path, "PSFHS")
37    if os.path.exists(data_dir):
38        return data_dir
39
40    os.makedirs(path, exist_ok=True)
41
42    zip_path = os.path.join(path, "PSFHS.zip")
43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
44    util.unzip(zip_path=zip_path, dst=path)
45
46    return data_dir

Download the PSFHS data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
download: Whether to download the data if it is not present.

Returns:

Filepath where the data is downloaded.

def get_psfhs_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[int], List[int]]: View Source

49def get_psfhs_paths(
50    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False
51) -> Tuple[List[int], List[int]]:
52    """Get paths to the PSFHS dataset.
53
54    Args:
55        path: Filepath to a folder where the data is downloaded for further processing.
56        split: The choice of data split.
57        download: Whether to download the data if it is not present.
58
59    Returns:
60        List of filepaths for the image data.
61        List of filepaths for the label data.
62    """
63    data_dir = get_psfhs_data(path, download)
64
65    raw_paths = natsorted(glob(os.path.join(data_dir, "image_mha", "*.mha")))
66    label_paths = natsorted(glob(os.path.join(data_dir, "label_mha", "*.mha")))
67
68    if split == "train":
69        raw_paths, label_paths = raw_paths[:900], label_paths[:900]
70    elif split == "val":
71        raw_paths, label_paths = raw_paths[900:1050], label_paths[900:1050]
72    elif split == "test":
73        raw_paths, label_paths = raw_paths[1050:], label_paths[1050:]
74    else:
75        raise ValueError(f"'{split}' is not a valid split.")
76
77    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
78
79    return raw_paths, label_paths

Get paths to the PSFHS dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
split: The choice of data split.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_psfhs_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 82def get_psfhs_dataset(
 83    path: Union[os.PathLike, str],
 84    patch_shape: Tuple[int, int],
 85    split: Literal['train', 'val', 'test'],
 86    resize_inputs: bool = False,
 87    download: bool = False,
 88    **kwargs
 89) -> Dataset:
 90    """Get the PSFHS dataset for segmentation of pubic symphysis and fetal head.
 91
 92    Args:
 93        path: Filepath to a folder where the data is downloaded for further processing.
 94        patch_shape: The patch shape to use for training.
 95        split: The choice of data split.
 96        resize_inputs: Whether to resize the inputs to the patch shape.
 97        download: Whether to download the data if it is not present.
 98        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 99
100    Returns:
101        The segmentation dataset.
102    """
103    raw_paths, label_paths = get_psfhs_paths(path, split, download)
104
105    if resize_inputs:
106        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True}
107        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
108            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
109        )
110
111    return torch_em.default_segmentation_dataset(
112        raw_paths=raw_paths,
113        raw_key=None,
114        label_paths=label_paths,
115        label_key=None,
116        ndim=2,
117        is_seg_dataset=False,
118        with_channels=True,
119        patch_shape=patch_shape,
120        **kwargs
121    )

Get the PSFHS dataset for segmentation of pubic symphysis and fetal head.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
split: The choice of data split.
resize_inputs: Whether to resize the inputs to the patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_psfhs_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

124def get_psfhs_loader(
125    path: Union[os.PathLike, str],
126    batch_size: int,
127    patch_shape: Tuple[int, int],
128    split: Literal['train', 'val', 'test'],
129    resize_inputs: bool = False,
130    download: bool = False,
131    **kwargs
132) -> DataLoader:
133    """Get the PSFHS dataset for segmentation of pubic symphysis and fetal head.
134
135    Args:
136        path: Filepath to a folder where the data is downloaded for further processing.
137        batch_size: The batch size for training.
138        patch_shape: The patch shape to use for training.
139        split: The choice of data split.
140        download: Whether to download the data if it is not present.
141        resize_inputs: Whether to resize the inputs to the patch shape.
142        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
143
144    Returns:
145        The segmentation dataset.
146    """
147    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
148    dataset = get_psfhs_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs)
149    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the PSFHS dataset for segmentation of pubic symphysis and fetal head.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The choice of data split.
download: Whether to download the data if it is not present.
resize_inputs: Whether to resize the inputs to the patch shape.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.