torch_em.data.datasets.medical.mbh_seg

The MBH Seg dataset contains annotations for intracranial hemorrhages in non-contrast CT scans.

This dataset is from the MBH-Seg challenge: https://mbh-seg.com

  1"""The MBH Seg dataset contains annotations for intracranial hemorrhages
  2in non-contrast CT scans.
  3
  4This dataset is from the MBH-Seg challenge: https://mbh-seg.com
  5- original scans: https://kaggle.com/competitions/rsna-intracranial-hemorrhage-detection
  6Please cite these if you use this dataset for your publication.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://huggingface.co/datasets/WuBiao/BHSD/resolve/main/label_192.zip"
 22CHECKSUM = "582bf184af993541a4958a4d209a6a44e3bbe702a5daefaf9fb1733a4e7a6e39"
 23
 24
 25def get_mbh_seg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the MBH Seg dataset.
 27
 28    Args:
 29        path: Filepath to a folder where the data is downloaded for further processing.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        Filepath where the data is download.
 34    """
 35    data_dir = os.path.join(path, "label_192")
 36    if os.path.exists(data_dir):
 37        return data_dir
 38
 39    os.makedirs(path, exist_ok=True)
 40
 41    zip_path = os.path.join(path, "label_192.zip")
 42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 43    util.unzip(zip_path=zip_path, dst=path)
 44
 45    return data_dir
 46
 47
 48def get_mbh_seg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 49    """Get paths to the MBH Seg data.
 50
 51    Args:
 52        path: Filepath to a folder where the data is downloaded for further processing.
 53        download: Whether to download the data if it is not present.
 54
 55    Returns:
 56        List of filepaths for the image data.
 57        List of filepaths for the label data.
 58    """
 59    data_dir = get_mbh_seg_data(path=path, download=download)
 60    image_paths = natsorted(glob(os.path.join(data_dir, "images", "*.nii.gz")))
 61    gt_paths = natsorted(glob(os.path.join(data_dir, r"ground truths", "*.nii.gz")))
 62    return image_paths, gt_paths
 63
 64
 65def get_mbh_seg_dataset(
 66    path: Union[os.PathLike, str],
 67    patch_shape: Tuple[int, ...],
 68    resize_inputs: bool = False,
 69    download: bool = False,
 70    **kwargs
 71) -> Dataset:
 72    """Get the MBH Seg dataset for intracranial hemorrhage segmentation.
 73
 74    Args:
 75        path: Filepath to a folder where the data is downloaded for further processing.
 76        patch_shape: The patch shape to use for training.
 77        resize_inputs: Whether to resize inputs to the desired patch shape.
 78        download: Whether to download the data if it is not present.
 79        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 80
 81    Returns:
 82        The segmentation dataset.
 83    """
 84    image_paths, gt_paths = get_mbh_seg_paths(path=path, download=download)
 85
 86    if resize_inputs:
 87        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
 88        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
 89            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
 90        )
 91
 92    return torch_em.default_segmentation_dataset(
 93        raw_paths=image_paths,
 94        raw_key="data",
 95        label_paths=gt_paths,
 96        label_key="data",
 97        patch_shape=patch_shape,
 98        **kwargs
 99    )
100
101
102def get_mbh_seg_loader(
103    path: Union[os.PathLike, str],
104    batch_size: int,
105    patch_shape: Tuple[int, ...],
106    resize_inputs: bool = False,
107    download: bool = False,
108    **kwargs
109) -> DataLoader:
110    """Get the MBH Seg dataloader for intracranial hemorrhage segmentation.
111
112    Args:
113        path: Filepath to a folder where the data is downloaded for further processing.
114        batch_size: The batch size for training.
115        patch_shape: The patch shape to use for training.
116        resize_inputs: Whether to resize inputs to the desired patch shape.
117        download: Whether to download the data if it is not present.
118        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
119
120    Returns:
121        The DataLoader.
122    """
123    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
124    dataset = get_mbh_seg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
125    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://huggingface.co/datasets/WuBiao/BHSD/resolve/main/label_192.zip'
CHECKSUM = '582bf184af993541a4958a4d209a6a44e3bbe702a5daefaf9fb1733a4e7a6e39'
def get_mbh_seg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_mbh_seg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the MBH Seg dataset.
28
29    Args:
30        path: Filepath to a folder where the data is downloaded for further processing.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        Filepath where the data is download.
35    """
36    data_dir = os.path.join(path, "label_192")
37    if os.path.exists(data_dir):
38        return data_dir
39
40    os.makedirs(path, exist_ok=True)
41
42    zip_path = os.path.join(path, "label_192.zip")
43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
44    util.unzip(zip_path=zip_path, dst=path)
45
46    return data_dir

Download the MBH Seg dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is download.

def get_mbh_seg_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
49def get_mbh_seg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
50    """Get paths to the MBH Seg data.
51
52    Args:
53        path: Filepath to a folder where the data is downloaded for further processing.
54        download: Whether to download the data if it is not present.
55
56    Returns:
57        List of filepaths for the image data.
58        List of filepaths for the label data.
59    """
60    data_dir = get_mbh_seg_data(path=path, download=download)
61    image_paths = natsorted(glob(os.path.join(data_dir, "images", "*.nii.gz")))
62    gt_paths = natsorted(glob(os.path.join(data_dir, r"ground truths", "*.nii.gz")))
63    return image_paths, gt_paths

Get paths to the MBH Seg data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_mbh_seg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 66def get_mbh_seg_dataset(
 67    path: Union[os.PathLike, str],
 68    patch_shape: Tuple[int, ...],
 69    resize_inputs: bool = False,
 70    download: bool = False,
 71    **kwargs
 72) -> Dataset:
 73    """Get the MBH Seg dataset for intracranial hemorrhage segmentation.
 74
 75    Args:
 76        path: Filepath to a folder where the data is downloaded for further processing.
 77        patch_shape: The patch shape to use for training.
 78        resize_inputs: Whether to resize inputs to the desired patch shape.
 79        download: Whether to download the data if it is not present.
 80        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 81
 82    Returns:
 83        The segmentation dataset.
 84    """
 85    image_paths, gt_paths = get_mbh_seg_paths(path=path, download=download)
 86
 87    if resize_inputs:
 88        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
 89        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
 90            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
 91        )
 92
 93    return torch_em.default_segmentation_dataset(
 94        raw_paths=image_paths,
 95        raw_key="data",
 96        label_paths=gt_paths,
 97        label_key="data",
 98        patch_shape=patch_shape,
 99        **kwargs
100    )

Get the MBH Seg dataset for intracranial hemorrhage segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • resize_inputs: Whether to resize inputs to the desired patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_mbh_seg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
103def get_mbh_seg_loader(
104    path: Union[os.PathLike, str],
105    batch_size: int,
106    patch_shape: Tuple[int, ...],
107    resize_inputs: bool = False,
108    download: bool = False,
109    **kwargs
110) -> DataLoader:
111    """Get the MBH Seg dataloader for intracranial hemorrhage segmentation.
112
113    Args:
114        path: Filepath to a folder where the data is downloaded for further processing.
115        batch_size: The batch size for training.
116        patch_shape: The patch shape to use for training.
117        resize_inputs: Whether to resize inputs to the desired patch shape.
118        download: Whether to download the data if it is not present.
119        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
120
121    Returns:
122        The DataLoader.
123    """
124    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
125    dataset = get_mbh_seg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs)
126    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the MBH Seg dataloader for intracranial hemorrhage segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • resize_inputs: Whether to resize inputs to the desired patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.