torch_em.data.datasets.medical.mice_tumseg
The Mice TumSeg contains annotations for tumor segmentation in micro-CT scans.
This dataset is from the publication https://doi.org/10.1038/s41597-024-03814-y. Please cite it if you use this dataset for your research.
1"""The Mice TumSeg contains annotations for tumor segmentation in micro-CT scans. 2 3This dataset is from the publication https://doi.org/10.1038/s41597-024-03814-y. 4Please cite it if you use this dataset for your research. 5""" 6 7import os 8from glob import glob 9from natsort import natsorted 10from typing import Union, Tuple, Literal, List 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19URL = "https://erda.ku.dk/archives/ba4fcd9bfa0fb581d593297dd43d1fd1/TumSeg%20database.zip" 20CHECKSUM = "1c0567358ec81b9e085434a0362c1c2981b791dc2733931aeb022174ecb80399" 21 22 23def get_mice_tumseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 24 """Download the Mice TumSeg dataset. 25 26 Args: 27 path: Filepath to a folder where the data is downloaded for further processing. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 Filepath where the data is downloaded. 32 """ 33 data_dir = os.path.join(path, r"TumSeg database") 34 if os.path.exists(data_dir): 35 return data_dir 36 37 os.makedirs(path, exist_ok=True) 38 39 zip_path = os.path.join(path, "TumSeg_database.zip") 40 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 41 util.unzip(zip_path=zip_path, dst=path) 42 return data_dir 43 44 45def get_mice_tumseg_paths( 46 path: Union[os.PathLike, str], 47 split: Literal['train', 'val', 'test'], 48 rater: Literal["A", "B", "C", "STAPLE"] = "A", 49 download: bool = False 50) -> Tuple[List[str], List[str]]: 51 """Get paths to the Mice TumSeg data. 52 53 Args: 54 path: Filepath to a folder where the data is downloaded for further processing. 55 split: The choice of data split. 56 rater: The choice of annotator. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_mice_tumseg_data(path, download) 64 65 if rater in ["A", "B", "C"]: 66 ann_choice = f"Annotator_{rater}" 67 elif rater == "STAPLE": 68 ann_choice = rater 69 else: 70 raise ValueError(f"'{rater}' is not a valid rater choice.") 71 72 raw_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", "CT*.nii.gz"), recursive=True)) 73 label_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", f"{ann_choice}*.nii.gz"), recursive=True)) 74 75 if split == "train": 76 raw_paths, label_paths = raw_paths[:325], label_paths[:325] 77 elif split == "val": 78 raw_paths, label_paths = raw_paths[325:360], label_paths[325:360] 79 elif split == "test": 80 raw_paths, label_paths = raw_paths[360:], label_paths[360:] 81 else: 82 raise ValueError(f"'{split}' is not a valid split.") 83 84 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 85 86 return raw_paths, label_paths 87 88 89def get_mice_tumseg_dataset( 90 path: Union[os.PathLike, str], 91 patch_shape: Tuple[int, ...], 92 split: Literal['train', 'val', 'test'], 93 rater: Literal["A", "B", "C", "STAPLE"] = "A", 94 resize_inputs: bool = False, 95 download: bool = False, 96 **kwargs 97) -> Dataset: 98 """Get the Mice TumSeg dataset for tumor segmentation. 99 100 Args: 101 path: Filepath to a folder where the data is downloaded for further processing. 102 patch_shape: The patch shape to use for training. 103 split: The choice of data split. 104 rater: The choice of annotator. 105 download: Whether to download the data if it is not present. 106 resize_inputs: Whether to resize the inputs. 107 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 108 109 Returns: 110 The segmentation dataset. 111 """ 112 raw_paths, label_paths = get_mice_tumseg_paths(path, split, rater, download) 113 114 if resize_inputs: 115 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 116 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 117 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 118 ) 119 120 return torch_em.default_segmentation_dataset( 121 raw_paths=raw_paths, 122 raw_key="data", 123 label_paths=label_paths, 124 label_key="data", 125 patch_shape=patch_shape, 126 **kwargs 127 ) 128 129 130def get_mice_tumseg_loader( 131 path: Union[os.PathLike, str], 132 batch_size: int, 133 patch_shape: Tuple[int, ...], 134 split: Literal['train', 'val', 'test'], 135 rater: Literal["A", "B", "C", "STAPLE"] = "A", 136 resize_inputs: bool = False, 137 download: bool = False, 138 **kwargs 139) -> DataLoader: 140 """Get the Mice TumSeg dataloader for tumor segmentation. 141 142 Args: 143 path: Filepath to a folder where the data is downloaded for further processing. 144 batch_size: The batch size for training. 145 patch_shape: The patch shape to use for training. 146 split: The choice of data split. 147 rater: The choice of annotator. 148 resize_inputs: Whether to resize the inputs. 149 download: Whether to download the data if it is not present. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 151 152 Returns: 153 The DataLoader. 154 """ 155 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 156 dataset = get_mice_tumseg_dataset(path, patch_shape, split, rater, resize_inputs, download, **ds_kwargs) 157 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://erda.ku.dk/archives/ba4fcd9bfa0fb581d593297dd43d1fd1/TumSeg%20database.zip'
CHECKSUM =
'1c0567358ec81b9e085434a0362c1c2981b791dc2733931aeb022174ecb80399'
def
get_mice_tumseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_mice_tumseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 25 """Download the Mice TumSeg dataset. 26 27 Args: 28 path: Filepath to a folder where the data is downloaded for further processing. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 Filepath where the data is downloaded. 33 """ 34 data_dir = os.path.join(path, r"TumSeg database") 35 if os.path.exists(data_dir): 36 return data_dir 37 38 os.makedirs(path, exist_ok=True) 39 40 zip_path = os.path.join(path, "TumSeg_database.zip") 41 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 42 util.unzip(zip_path=zip_path, dst=path) 43 return data_dir
Download the Mice TumSeg dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_mice_tumseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], rater: Literal['A', 'B', 'C', 'STAPLE'] = 'A', download: bool = False) -> Tuple[List[str], List[str]]:
46def get_mice_tumseg_paths( 47 path: Union[os.PathLike, str], 48 split: Literal['train', 'val', 'test'], 49 rater: Literal["A", "B", "C", "STAPLE"] = "A", 50 download: bool = False 51) -> Tuple[List[str], List[str]]: 52 """Get paths to the Mice TumSeg data. 53 54 Args: 55 path: Filepath to a folder where the data is downloaded for further processing. 56 split: The choice of data split. 57 rater: The choice of annotator. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_dir = get_mice_tumseg_data(path, download) 65 66 if rater in ["A", "B", "C"]: 67 ann_choice = f"Annotator_{rater}" 68 elif rater == "STAPLE": 69 ann_choice = rater 70 else: 71 raise ValueError(f"'{rater}' is not a valid rater choice.") 72 73 raw_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", "CT*.nii.gz"), recursive=True)) 74 label_paths = natsorted(glob(os.path.join(data_dir, "Dataset*", "**", f"{ann_choice}*.nii.gz"), recursive=True)) 75 76 if split == "train": 77 raw_paths, label_paths = raw_paths[:325], label_paths[:325] 78 elif split == "val": 79 raw_paths, label_paths = raw_paths[325:360], label_paths[325:360] 80 elif split == "test": 81 raw_paths, label_paths = raw_paths[360:], label_paths[360:] 82 else: 83 raise ValueError(f"'{split}' is not a valid split.") 84 85 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 86 87 return raw_paths, label_paths
Get paths to the Mice TumSeg data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The choice of data split.
- rater: The choice of annotator.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_mice_tumseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], rater: Literal['A', 'B', 'C', 'STAPLE'] = 'A', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
90def get_mice_tumseg_dataset( 91 path: Union[os.PathLike, str], 92 patch_shape: Tuple[int, ...], 93 split: Literal['train', 'val', 'test'], 94 rater: Literal["A", "B", "C", "STAPLE"] = "A", 95 resize_inputs: bool = False, 96 download: bool = False, 97 **kwargs 98) -> Dataset: 99 """Get the Mice TumSeg dataset for tumor segmentation. 100 101 Args: 102 path: Filepath to a folder where the data is downloaded for further processing. 103 patch_shape: The patch shape to use for training. 104 split: The choice of data split. 105 rater: The choice of annotator. 106 download: Whether to download the data if it is not present. 107 resize_inputs: Whether to resize the inputs. 108 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 109 110 Returns: 111 The segmentation dataset. 112 """ 113 raw_paths, label_paths = get_mice_tumseg_paths(path, split, rater, download) 114 115 if resize_inputs: 116 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 117 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 118 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 119 ) 120 121 return torch_em.default_segmentation_dataset( 122 raw_paths=raw_paths, 123 raw_key="data", 124 label_paths=label_paths, 125 label_key="data", 126 patch_shape=patch_shape, 127 **kwargs 128 )
Get the Mice TumSeg dataset for tumor segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- rater: The choice of annotator.
- download: Whether to download the data if it is not present.
- resize_inputs: Whether to resize the inputs.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_mice_tumseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], split: Literal['train', 'val', 'test'], rater: Literal['A', 'B', 'C', 'STAPLE'] = 'A', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
131def get_mice_tumseg_loader( 132 path: Union[os.PathLike, str], 133 batch_size: int, 134 patch_shape: Tuple[int, ...], 135 split: Literal['train', 'val', 'test'], 136 rater: Literal["A", "B", "C", "STAPLE"] = "A", 137 resize_inputs: bool = False, 138 download: bool = False, 139 **kwargs 140) -> DataLoader: 141 """Get the Mice TumSeg dataloader for tumor segmentation. 142 143 Args: 144 path: Filepath to a folder where the data is downloaded for further processing. 145 batch_size: The batch size for training. 146 patch_shape: The patch shape to use for training. 147 split: The choice of data split. 148 rater: The choice of annotator. 149 resize_inputs: Whether to resize the inputs. 150 download: Whether to download the data if it is not present. 151 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 152 153 Returns: 154 The DataLoader. 155 """ 156 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 157 dataset = get_mice_tumseg_dataset(path, patch_shape, split, rater, resize_inputs, download, **ds_kwargs) 158 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Mice TumSeg dataloader for tumor segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- rater: The choice of annotator.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.