torch_em.data.datasets.medical.mbh_seg
The MBH Seg dataset contains annotations for intracranial hemorrhages in non-contrast CT scans.
This dataset is from the MBH-Seg challenge: https://mbh-seg.com
- original scans: https://kaggle.com/competitions/rsna-intracranial-hemorrhage-detection Please cite these if you use this dataset for your publication.
1"""The MBH Seg dataset contains annotations for intracranial hemorrhages 2in non-contrast CT scans. 3 4This dataset is from the MBH-Seg challenge: https://mbh-seg.com 5- original scans: https://kaggle.com/competitions/rsna-intracranial-hemorrhage-detection 6Please cite these if you use this dataset for your publication. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Tuple, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://huggingface.co/datasets/WuBiao/BHSD/resolve/main/label_192.zip" 22CHECKSUM = "582bf184af993541a4958a4d209a6a44e3bbe702a5daefaf9fb1733a4e7a6e39" 23 24 25def get_mbh_seg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Download the MBH Seg dataset. 27 28 Args: 29 path: Filepath to a folder where the data is downloaded for further processing. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 Filepath where the data is download. 34 """ 35 data_dir = os.path.join(path, "label_192") 36 if os.path.exists(data_dir): 37 return data_dir 38 39 os.makedirs(path, exist_ok=True) 40 41 zip_path = os.path.join(path, "label_192.zip") 42 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip(zip_path=zip_path, dst=path) 44 45 return data_dir 46 47 48def get_mbh_seg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 49 """Get paths to the MBH Seg data. 50 51 Args: 52 path: Filepath to a folder where the data is downloaded for further processing. 53 download: Whether to download the data if it is not present. 54 55 Returns: 56 List of filepaths for the image data. 57 List of filepaths for the label data. 58 """ 59 data_dir = get_mbh_seg_data(path=path, download=download) 60 image_paths = natsorted(glob(os.path.join(data_dir, "images", "*.nii.gz"))) 61 gt_paths = natsorted(glob(os.path.join(data_dir, r"ground truths", "*.nii.gz"))) 62 return image_paths, gt_paths 63 64 65def get_mbh_seg_dataset( 66 path: Union[os.PathLike, str], 67 patch_shape: Tuple[int, ...], 68 resize_inputs: bool = False, 69 download: bool = False, 70 **kwargs 71) -> Dataset: 72 """Get the MBH Seg dataset for intracranial hemorrhage segmentation. 73 74 Args: 75 path: Filepath to a folder where the data is downloaded for further processing. 76 patch_shape: The patch shape to use for training. 77 resize_inputs: Whether to resize inputs to the desired patch shape. 78 download: Whether to download the data if it is not present. 79 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 80 81 Returns: 82 The segmentation dataset. 83 """ 84 image_paths, gt_paths = get_mbh_seg_paths(path=path, download=download) 85 86 if resize_inputs: 87 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 88 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 89 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 90 ) 91 92 return torch_em.default_segmentation_dataset( 93 raw_paths=image_paths, 94 raw_key="data", 95 label_paths=gt_paths, 96 label_key="data", 97 patch_shape=patch_shape, 98 **kwargs 99 ) 100 101 102def get_mbh_seg_loader( 103 path: Union[os.PathLike, str], 104 batch_size: int, 105 patch_shape: Tuple[int, ...], 106 resize_inputs: bool = False, 107 download: bool = False, 108 **kwargs 109) -> DataLoader: 110 """Get the MBH Seg dataloader for intracranial hemorrhage segmentation. 111 112 Args: 113 path: Filepath to a folder where the data is downloaded for further processing. 114 batch_size: The batch size for training. 115 patch_shape: The patch shape to use for training. 116 resize_inputs: Whether to resize inputs to the desired patch shape. 117 download: Whether to download the data if it is not present. 118 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 119 120 Returns: 121 The DataLoader. 122 """ 123 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 124 dataset = get_mbh_seg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 125 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://huggingface.co/datasets/WuBiao/BHSD/resolve/main/label_192.zip'
CHECKSUM =
'582bf184af993541a4958a4d209a6a44e3bbe702a5daefaf9fb1733a4e7a6e39'
def
get_mbh_seg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_mbh_seg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Download the MBH Seg dataset. 28 29 Args: 30 path: Filepath to a folder where the data is downloaded for further processing. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 Filepath where the data is download. 35 """ 36 data_dir = os.path.join(path, "label_192") 37 if os.path.exists(data_dir): 38 return data_dir 39 40 os.makedirs(path, exist_ok=True) 41 42 zip_path = os.path.join(path, "label_192.zip") 43 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 44 util.unzip(zip_path=zip_path, dst=path) 45 46 return data_dir
Download the MBH Seg dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is download.
def
get_mbh_seg_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
49def get_mbh_seg_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 50 """Get paths to the MBH Seg data. 51 52 Args: 53 path: Filepath to a folder where the data is downloaded for further processing. 54 download: Whether to download the data if it is not present. 55 56 Returns: 57 List of filepaths for the image data. 58 List of filepaths for the label data. 59 """ 60 data_dir = get_mbh_seg_data(path=path, download=download) 61 image_paths = natsorted(glob(os.path.join(data_dir, "images", "*.nii.gz"))) 62 gt_paths = natsorted(glob(os.path.join(data_dir, r"ground truths", "*.nii.gz"))) 63 return image_paths, gt_paths
Get paths to the MBH Seg data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_mbh_seg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
66def get_mbh_seg_dataset( 67 path: Union[os.PathLike, str], 68 patch_shape: Tuple[int, ...], 69 resize_inputs: bool = False, 70 download: bool = False, 71 **kwargs 72) -> Dataset: 73 """Get the MBH Seg dataset for intracranial hemorrhage segmentation. 74 75 Args: 76 path: Filepath to a folder where the data is downloaded for further processing. 77 patch_shape: The patch shape to use for training. 78 resize_inputs: Whether to resize inputs to the desired patch shape. 79 download: Whether to download the data if it is not present. 80 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 81 82 Returns: 83 The segmentation dataset. 84 """ 85 image_paths, gt_paths = get_mbh_seg_paths(path=path, download=download) 86 87 if resize_inputs: 88 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 89 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 90 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 91 ) 92 93 return torch_em.default_segmentation_dataset( 94 raw_paths=image_paths, 95 raw_key="data", 96 label_paths=gt_paths, 97 label_key="data", 98 patch_shape=patch_shape, 99 **kwargs 100 )
Get the MBH Seg dataset for intracranial hemorrhage segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_mbh_seg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
103def get_mbh_seg_loader( 104 path: Union[os.PathLike, str], 105 batch_size: int, 106 patch_shape: Tuple[int, ...], 107 resize_inputs: bool = False, 108 download: bool = False, 109 **kwargs 110) -> DataLoader: 111 """Get the MBH Seg dataloader for intracranial hemorrhage segmentation. 112 113 Args: 114 path: Filepath to a folder where the data is downloaded for further processing. 115 batch_size: The batch size for training. 116 patch_shape: The patch shape to use for training. 117 resize_inputs: Whether to resize inputs to the desired patch shape. 118 download: Whether to download the data if it is not present. 119 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 120 121 Returns: 122 The DataLoader. 123 """ 124 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 125 dataset = get_mbh_seg_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 126 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the MBH Seg dataloader for intracranial hemorrhage segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize inputs to the desired patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.