torch_em.data.datasets.medical.covid19_seg
The Covid19Seg dataset contains annotations for lung and covid infection in CT scans.
This dataset is located at https://doi.org/10.5281/zenodo.3757476. The dataset is from the publication https://doi.org/10.1002/mp.14676. Please cite it if you use this dataset for your research.
1"""The Covid19Seg dataset contains annotations for lung and covid infection in CT scans. 2 3This dataset is located at https://doi.org/10.5281/zenodo.3757476. 4The dataset is from the publication https://doi.org/10.1002/mp.14676. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from pathlib import Path 11from natsort import natsorted 12from typing import Union, Tuple, Optional, Literal, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = { 22 "images": "https://zenodo.org/records/3757476/files/COVID-19-CT-Seg_20cases.zip", 23 "lung_and_infection": "https://zenodo.org/records/3757476/files/Lung_and_Infection_Mask.zip", 24 "lung": "https://zenodo.org/records/3757476/files/Lung_Mask.zip", 25 "infection": "https://zenodo.org/records/3757476/files/Infection_Mask.zip" 26} 27 28CHECKSUM = { 29 "images": "a5060480eff9315b069b086312dac4872777901fb80d268a5a83edd9f4e7b440", 30 "lung_and_infection": "34f5a573cb8fb53cb15abe81868395d9addf436854826a6fd6e70c2b294f19c3", 31 "lung": "f060b0d0299939a6d95ddefdbfa281de1a779c4d230a5adbd32414711d6d8187", 32 "infection": "87901c73fdd2230260e61d2dbc57bf56026efc28264006b8ea2bf411453c1694" 33} 34 35ZIP_FNAMES = { 36 "images": "COVID-19-CT-Seg_20cases.zip", 37 "lung_and_infection": "Lung_and_Infection_Mask.zip", 38 "lung": "Lung_Mask.zip", 39 "infection": "Infection_Mask.zip" 40} 41 42 43def get_covid19_seg_data( 44 path: Union[os.PathLike, str], 45 task: Literal['lung', 'infection', 'lung_and_infection'], 46 download: bool = False 47) -> Tuple[str, str]: 48 """Download the Covid19Seg dataset. 49 50 Args: 51 path: Filepath to a folder where the data is downloaded for further processing. 52 task: The choice of labels for specific task. 53 download: Whether to download the data if it is not present. 54 55 Returns: 56 Filepath where the image data is downloaded. 57 Filepath where the label data is downloaded. 58 """ 59 im_dir = os.path.join(path, "images", Path(ZIP_FNAMES["images"]).stem) 60 gt_dir = os.path.join(path, "gt", Path(ZIP_FNAMES[task]).stem) 61 62 if os.path.exists(im_dir) and os.path.exists(gt_dir): 63 return im_dir, gt_dir 64 65 os.makedirs(path, exist_ok=True) 66 67 im_zip_path = os.path.join(path, ZIP_FNAMES["images"]) 68 gt_zip_path = os.path.join(path, ZIP_FNAMES[task]) 69 70 # download the images 71 util.download_source(path=im_zip_path, url=URL["images"], download=download, checksum=CHECKSUM["images"]) 72 util.unzip(zip_path=im_zip_path, dst=im_dir, remove=False) 73 74 # download the labels 75 util.download_source(path=gt_zip_path, url=URL[task], download=download, checksum=CHECKSUM[task]) 76 util.unzip(zip_path=gt_zip_path, dst=gt_dir) 77 78 return im_dir, gt_dir 79 80 81def get_covid19_seg_paths( 82 path: Union[os.PathLike, str], 83 task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, 84 download: bool = False 85) -> Tuple[List[str], List[str]]: 86 """Get paths to the Covid19Seg data. 87 88 Args: 89 path: Filepath to a folder where the data is downloaded for further processing. 90 task: The choice of labels for specific task. 91 download: Whether to download the data if it is not present. 92 93 Returns: 94 List of filepaths for the image data. 95 List of filepaths for the label data. 96 """ 97 if task is None: 98 task = "lung_and_infection" 99 else: 100 assert task in ["lung", "infection", "lung_and_infection"], f"{task} is not a valid task." 101 102 image_dir, gt_dir = get_covid19_seg_data(path, task, download) 103 image_paths = natsorted(glob(os.path.join(image_dir, "*.nii.gz"))) 104 gt_paths = natsorted(glob(os.path.join(gt_dir, "*.nii.gz"))) 105 return image_paths, gt_paths 106 107 108def get_covid19_seg_dataset( 109 path: Union[os.PathLike, str], 110 patch_shape: Tuple[int, int], 111 task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, 112 resize_inputs: bool = False, 113 download: bool = False, 114 **kwargs 115) -> Dataset: 116 """Get the Covid19Seg dataset for lung and covid infection segmentation in CT scans. 117 118 Args: 119 path: Filepath to a folder where the data is downloaded for further processing. 120 patch_shape: The patch shape to use for training. 121 task: The choice of labels for specific task. 122 resize_inputs: Whether to resize the inputs to the patch shape. 123 download: Whether to download the data if it is not present. 124 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 125 126 Returns: 127 The segmentation dataset. 128 """ 129 image_paths, gt_paths = get_covid19_seg_paths(path, task, download) 130 131 if resize_inputs: 132 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 133 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 134 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 135 ) 136 137 return torch_em.default_segmentation_dataset( 138 raw_paths=image_paths, 139 raw_key="data", 140 label_paths=gt_paths, 141 label_key="data", 142 patch_shape=patch_shape, 143 is_seg_dataset=True, 144 **kwargs 145 ) 146 147 148def get_covid19_seg_loader( 149 path: Union[os.PathLike, str], 150 batch_size: int, 151 patch_shape: Tuple[int, int], 152 task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, 153 download: bool = False, 154 **kwargs 155) -> DataLoader: 156 """Get the Covid19Seg dataloader for lung and covid infection segmentation in CT scans. 157 158 Args: 159 path: Filepath to a folder where the data is downloaded for further processing. 160 patch_shape: The patch shape to use for training. 161 task: The choice of labels for specific task. 162 resize_inputs: Whether to resize the inputs to the patch shape. 163 download: Whether to download the data if it is not present. 164 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 165 166 Returns: 167 The DataLoader. 168 """ 169 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 170 dataset = get_covid19_seg_dataset(path, patch_shape, task, download, **ds_kwargs) 171 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
{'images': 'https://zenodo.org/records/3757476/files/COVID-19-CT-Seg_20cases.zip', 'lung_and_infection': 'https://zenodo.org/records/3757476/files/Lung_and_Infection_Mask.zip', 'lung': 'https://zenodo.org/records/3757476/files/Lung_Mask.zip', 'infection': 'https://zenodo.org/records/3757476/files/Infection_Mask.zip'}
CHECKSUM =
{'images': 'a5060480eff9315b069b086312dac4872777901fb80d268a5a83edd9f4e7b440', 'lung_and_infection': '34f5a573cb8fb53cb15abe81868395d9addf436854826a6fd6e70c2b294f19c3', 'lung': 'f060b0d0299939a6d95ddefdbfa281de1a779c4d230a5adbd32414711d6d8187', 'infection': '87901c73fdd2230260e61d2dbc57bf56026efc28264006b8ea2bf411453c1694'}
ZIP_FNAMES =
{'images': 'COVID-19-CT-Seg_20cases.zip', 'lung_and_infection': 'Lung_and_Infection_Mask.zip', 'lung': 'Lung_Mask.zip', 'infection': 'Infection_Mask.zip'}
def
get_covid19_seg_data( path: Union[os.PathLike, str], task: Literal['lung', 'infection', 'lung_and_infection'], download: bool = False) -> Tuple[str, str]:
44def get_covid19_seg_data( 45 path: Union[os.PathLike, str], 46 task: Literal['lung', 'infection', 'lung_and_infection'], 47 download: bool = False 48) -> Tuple[str, str]: 49 """Download the Covid19Seg dataset. 50 51 Args: 52 path: Filepath to a folder where the data is downloaded for further processing. 53 task: The choice of labels for specific task. 54 download: Whether to download the data if it is not present. 55 56 Returns: 57 Filepath where the image data is downloaded. 58 Filepath where the label data is downloaded. 59 """ 60 im_dir = os.path.join(path, "images", Path(ZIP_FNAMES["images"]).stem) 61 gt_dir = os.path.join(path, "gt", Path(ZIP_FNAMES[task]).stem) 62 63 if os.path.exists(im_dir) and os.path.exists(gt_dir): 64 return im_dir, gt_dir 65 66 os.makedirs(path, exist_ok=True) 67 68 im_zip_path = os.path.join(path, ZIP_FNAMES["images"]) 69 gt_zip_path = os.path.join(path, ZIP_FNAMES[task]) 70 71 # download the images 72 util.download_source(path=im_zip_path, url=URL["images"], download=download, checksum=CHECKSUM["images"]) 73 util.unzip(zip_path=im_zip_path, dst=im_dir, remove=False) 74 75 # download the labels 76 util.download_source(path=gt_zip_path, url=URL[task], download=download, checksum=CHECKSUM[task]) 77 util.unzip(zip_path=gt_zip_path, dst=gt_dir) 78 79 return im_dir, gt_dir
Download the Covid19Seg dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- task: The choice of labels for specific task.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the image data is downloaded. Filepath where the label data is downloaded.
def
get_covid19_seg_paths( path: Union[os.PathLike, str], task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
82def get_covid19_seg_paths( 83 path: Union[os.PathLike, str], 84 task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, 85 download: bool = False 86) -> Tuple[List[str], List[str]]: 87 """Get paths to the Covid19Seg data. 88 89 Args: 90 path: Filepath to a folder where the data is downloaded for further processing. 91 task: The choice of labels for specific task. 92 download: Whether to download the data if it is not present. 93 94 Returns: 95 List of filepaths for the image data. 96 List of filepaths for the label data. 97 """ 98 if task is None: 99 task = "lung_and_infection" 100 else: 101 assert task in ["lung", "infection", "lung_and_infection"], f"{task} is not a valid task." 102 103 image_dir, gt_dir = get_covid19_seg_data(path, task, download) 104 image_paths = natsorted(glob(os.path.join(image_dir, "*.nii.gz"))) 105 gt_paths = natsorted(glob(os.path.join(gt_dir, "*.nii.gz"))) 106 return image_paths, gt_paths
Get paths to the Covid19Seg data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- task: The choice of labels for specific task.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_covid19_seg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
109def get_covid19_seg_dataset( 110 path: Union[os.PathLike, str], 111 patch_shape: Tuple[int, int], 112 task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, 113 resize_inputs: bool = False, 114 download: bool = False, 115 **kwargs 116) -> Dataset: 117 """Get the Covid19Seg dataset for lung and covid infection segmentation in CT scans. 118 119 Args: 120 path: Filepath to a folder where the data is downloaded for further processing. 121 patch_shape: The patch shape to use for training. 122 task: The choice of labels for specific task. 123 resize_inputs: Whether to resize the inputs to the patch shape. 124 download: Whether to download the data if it is not present. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 126 127 Returns: 128 The segmentation dataset. 129 """ 130 image_paths, gt_paths = get_covid19_seg_paths(path, task, download) 131 132 if resize_inputs: 133 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 134 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 135 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 136 ) 137 138 return torch_em.default_segmentation_dataset( 139 raw_paths=image_paths, 140 raw_key="data", 141 label_paths=gt_paths, 142 label_key="data", 143 patch_shape=patch_shape, 144 is_seg_dataset=True, 145 **kwargs 146 )
Get the Covid19Seg dataset for lung and covid infection segmentation in CT scans.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- task: The choice of labels for specific task.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_covid19_seg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
149def get_covid19_seg_loader( 150 path: Union[os.PathLike, str], 151 batch_size: int, 152 patch_shape: Tuple[int, int], 153 task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, 154 download: bool = False, 155 **kwargs 156) -> DataLoader: 157 """Get the Covid19Seg dataloader for lung and covid infection segmentation in CT scans. 158 159 Args: 160 path: Filepath to a folder where the data is downloaded for further processing. 161 patch_shape: The patch shape to use for training. 162 task: The choice of labels for specific task. 163 resize_inputs: Whether to resize the inputs to the patch shape. 164 download: Whether to download the data if it is not present. 165 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 166 167 Returns: 168 The DataLoader. 169 """ 170 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 171 dataset = get_covid19_seg_dataset(path, patch_shape, task, download, **ds_kwargs) 172 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Covid19Seg dataloader for lung and covid infection segmentation in CT scans.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- task: The choice of labels for specific task.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.