torch_em.data.datasets.medical.covid_qu_ex

The COVID QU EX dataset contains annotations for segmentations of lung and infection in X-Ray images.

The dataset is located at https://www.kaggle.com/datasets/anasmohammedtahir/covidqu. This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2021.104319. Please cite them if you use this dataset for your research.

  1"""The COVID QU EX dataset contains annotations for segmentations of
  2lung and infection in X-Ray images.
  3
  4The dataset is located at https://www.kaggle.com/datasets/anasmohammedtahir/covidqu.
  5This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2021.104319.
  6Please cite them if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Tuple, Union, Optional, Literal, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21def get_covid_qu_ex_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 22    """Download the COVID QU EX dataset.
 23
 24    Args:
 25        path: Filepath to a folder where the data is downloaded for further processing.
 26        download: Whether to download the data if it is not present.
 27
 28    Returns:
 29        Filepath where the data is downlaoded.
 30    """
 31    data_dir = os.path.join(path, "data")
 32    if os.path.exists(data_dir):
 33        return data_dir
 34
 35    os.makedirs(path, exist_ok=True)
 36
 37    util.download_source_kaggle(path=path, dataset_name="anasmohammedtahir/covidqu", download=download)
 38    zip_path = os.path.join(path, "covidqu.zip")
 39    util.unzip(zip_path=zip_path, dst=data_dir)
 40
 41    return data_dir
 42
 43
 44def get_covid_qu_ex_paths(
 45    path: Union[os.PathLike, str],
 46    split: Literal['train', 'val', 'test'],
 47    task: Literal['lung', 'infection'],
 48    patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None,
 49    segmentation_mask: Literal['lung', 'infection'] = "lung",
 50    download: bool = False
 51) -> Tuple[List[str], List[str]]:
 52    """Get paths to the COVID QU EX data.
 53
 54    Args:
 55        path: Filepath to a folder where the data is downloaded for further processing.
 56        split: The data split to use. Either 'train', 'val' or 'test'.
 57        task: The choice for the subset of dataset. Either 'lung' or 'infection'.
 58        patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'.
 59            By default is None, i.e. all the patient data will be chosen.
 60        segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
 61        download: Whether to download the data if it is not present.
 62
 63    Returns:
 64        List of filepaths for the image data.
 65        List of filepaths for the label data.
 66    """
 67    data_dir = get_covid_qu_ex_data(path=path, download=download)
 68
 69    assert split.lower() in ["train", "val", "test"], f"'{split}' is not a valid split."
 70
 71    if task == "lung":
 72        _task = r"Lung Segmentation Data/Lung Segmentation Data"
 73    elif task == "infection":
 74        _task = r"Infection Segmentation Data/Infection Segmentation Data"
 75    else:
 76        raise ValueError(f"'{task}' is not a valid task.")
 77
 78    if patient_type == "covid19":
 79        patient_type = "COVID-19"
 80    elif patient_type == "non-covid":
 81        patient_type = "Non-COVID"
 82    elif patient_type == "normal":
 83        patient_type = "Normal"
 84    else:
 85        if patient_type is None:
 86            patient_type = "*"
 87        else:
 88            raise ValueError(f"'{patient_type}' is not a valid patient type.")
 89
 90    base_dir = os.path.join(data_dir, _task, split.title(), patient_type)
 91
 92    if segmentation_mask == "lung":
 93        segmentation_mask = r"lung masks"
 94    elif segmentation_mask == "infection":
 95        if task == "lung":
 96            raise AssertionError("The 'lung' data subset does not have infection masks.")
 97        segmentation_mask = r"infection masks"
 98    else:
 99        if segmentation_mask is None:
100            segmentation_mask = "*"
101        else:
102            raise ValueError(f"'{segmentation_mask}' is not a valid segmentation task.")
103
104    image_paths = natsorted(glob(os.path.join(base_dir, "images", "*")))
105    gt_paths = natsorted(glob(os.path.join(base_dir, segmentation_mask, "*")))
106
107    return image_paths, gt_paths
108
109
110def get_covid_qu_ex_dataset(
111    path: Union[os.PathLike, str],
112    patch_shape: Tuple[int, int],
113    split: Literal['train', 'val', 'test'],
114    task: Literal['lung', 'infection'],
115    patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None,
116    segmentation_mask: Literal['lung', 'infection'] = "lung",
117    resize_inputs: bool = False,
118    download: bool = False,
119    **kwargs
120) -> Dataset:
121    """Get the COVID QU EX dataset for lung and infection segmentation.
122
123    Args:
124        path: Filepath to a folder where the data is downloaded for further processing.
125        patch_shape: The patch shape to use for training.
126        split: The data split to use. Either 'train', 'val' or 'test'.
127        task: The choice for the subset of dataset. Either 'lung' or 'infection'.
128        patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'.
129            By default is None, i.e. all the patient data will be chosen.
130        segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
131        resize_inputs: Whether to resize the inputs to the patch shape.
132        download: Whether to download the data if it is not present.
133        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
134
135    Returns:
136        The segmentation dataset.
137    """
138    image_paths, gt_paths = get_covid_qu_ex_paths(path, split, task, patient_type, segmentation_mask, download)
139
140    if resize_inputs:
141        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
142        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
143            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
144        )
145
146    return torch_em.default_segmentation_dataset(
147        raw_paths=image_paths,
148        raw_key=None,
149        label_paths=gt_paths,
150        label_key=None,
151        patch_shape=patch_shape,
152        **kwargs
153    )
154
155
156def get_covid_qu_ex_loader(
157    path: Union[os.PathLike, str],
158    batch_size: int,
159    patch_shape: Tuple[int, int],
160    split: Literal['train', 'val', 'test'],
161    task: Literal['lung', 'infection'],
162    patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None,
163    segmentation_mask: Literal['lung', 'infection'] = "lung",
164    resize_inputs: bool = False,
165    download: bool = False,
166    **kwargs
167) -> DataLoader:
168    """Get the COVID QU EX dataloader for lung and infection segmentation.
169
170    Args:
171        path: Filepath to a folder where the data is downloaded for further processing.
172        batch_size: The batch size for training.
173        patch_shape: The patch shape to use for training.
174        split: The data split to use. Either 'train', 'val' or 'test'.
175        task: The choice for the subset of dataset. Either 'lung' or 'infection'.
176        patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'.
177            By default is None, i.e. all the patient data will be chosen.
178        segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
179        resize_inputs: Whether to resize the inputs to the patch shape.
180        download: Whether to download the data if it is not present.
181        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
182
183    Returns:
184        The DataLoader.
185    """
186    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
187    dataset = get_covid_qu_ex_dataset(
188        path, patch_shape, split, task, patient_type, segmentation_mask, resize_inputs, download, **ds_kwargs
189    )
190    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def get_covid_qu_ex_data(path: Union[os.PathLike, str], download: bool = False) -> str:
22def get_covid_qu_ex_data(path: Union[os.PathLike, str], download: bool = False) -> str:
23    """Download the COVID QU EX dataset.
24
25    Args:
26        path: Filepath to a folder where the data is downloaded for further processing.
27        download: Whether to download the data if it is not present.
28
29    Returns:
30        Filepath where the data is downlaoded.
31    """
32    data_dir = os.path.join(path, "data")
33    if os.path.exists(data_dir):
34        return data_dir
35
36    os.makedirs(path, exist_ok=True)
37
38    util.download_source_kaggle(path=path, dataset_name="anasmohammedtahir/covidqu", download=download)
39    zip_path = os.path.join(path, "covidqu.zip")
40    util.unzip(zip_path=zip_path, dst=data_dir)
41
42    return data_dir

Download the COVID QU EX dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downlaoded.

def get_covid_qu_ex_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], task: Literal['lung', 'infection'], patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None, segmentation_mask: Literal['lung', 'infection'] = 'lung', download: bool = False) -> Tuple[List[str], List[str]]:
 45def get_covid_qu_ex_paths(
 46    path: Union[os.PathLike, str],
 47    split: Literal['train', 'val', 'test'],
 48    task: Literal['lung', 'infection'],
 49    patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None,
 50    segmentation_mask: Literal['lung', 'infection'] = "lung",
 51    download: bool = False
 52) -> Tuple[List[str], List[str]]:
 53    """Get paths to the COVID QU EX data.
 54
 55    Args:
 56        path: Filepath to a folder where the data is downloaded for further processing.
 57        split: The data split to use. Either 'train', 'val' or 'test'.
 58        task: The choice for the subset of dataset. Either 'lung' or 'infection'.
 59        patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'.
 60            By default is None, i.e. all the patient data will be chosen.
 61        segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
 62        download: Whether to download the data if it is not present.
 63
 64    Returns:
 65        List of filepaths for the image data.
 66        List of filepaths for the label data.
 67    """
 68    data_dir = get_covid_qu_ex_data(path=path, download=download)
 69
 70    assert split.lower() in ["train", "val", "test"], f"'{split}' is not a valid split."
 71
 72    if task == "lung":
 73        _task = r"Lung Segmentation Data/Lung Segmentation Data"
 74    elif task == "infection":
 75        _task = r"Infection Segmentation Data/Infection Segmentation Data"
 76    else:
 77        raise ValueError(f"'{task}' is not a valid task.")
 78
 79    if patient_type == "covid19":
 80        patient_type = "COVID-19"
 81    elif patient_type == "non-covid":
 82        patient_type = "Non-COVID"
 83    elif patient_type == "normal":
 84        patient_type = "Normal"
 85    else:
 86        if patient_type is None:
 87            patient_type = "*"
 88        else:
 89            raise ValueError(f"'{patient_type}' is not a valid patient type.")
 90
 91    base_dir = os.path.join(data_dir, _task, split.title(), patient_type)
 92
 93    if segmentation_mask == "lung":
 94        segmentation_mask = r"lung masks"
 95    elif segmentation_mask == "infection":
 96        if task == "lung":
 97            raise AssertionError("The 'lung' data subset does not have infection masks.")
 98        segmentation_mask = r"infection masks"
 99    else:
100        if segmentation_mask is None:
101            segmentation_mask = "*"
102        else:
103            raise ValueError(f"'{segmentation_mask}' is not a valid segmentation task.")
104
105    image_paths = natsorted(glob(os.path.join(base_dir, "images", "*")))
106    gt_paths = natsorted(glob(os.path.join(base_dir, segmentation_mask, "*")))
107
108    return image_paths, gt_paths

Get paths to the COVID QU EX data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • task: The choice for the subset of dataset. Either 'lung' or 'infection'.
  • patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'. By default is None, i.e. all the patient data will be chosen.
  • segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_covid_qu_ex_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], task: Literal['lung', 'infection'], patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None, segmentation_mask: Literal['lung', 'infection'] = 'lung', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
111def get_covid_qu_ex_dataset(
112    path: Union[os.PathLike, str],
113    patch_shape: Tuple[int, int],
114    split: Literal['train', 'val', 'test'],
115    task: Literal['lung', 'infection'],
116    patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None,
117    segmentation_mask: Literal['lung', 'infection'] = "lung",
118    resize_inputs: bool = False,
119    download: bool = False,
120    **kwargs
121) -> Dataset:
122    """Get the COVID QU EX dataset for lung and infection segmentation.
123
124    Args:
125        path: Filepath to a folder where the data is downloaded for further processing.
126        patch_shape: The patch shape to use for training.
127        split: The data split to use. Either 'train', 'val' or 'test'.
128        task: The choice for the subset of dataset. Either 'lung' or 'infection'.
129        patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'.
130            By default is None, i.e. all the patient data will be chosen.
131        segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
132        resize_inputs: Whether to resize the inputs to the patch shape.
133        download: Whether to download the data if it is not present.
134        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
135
136    Returns:
137        The segmentation dataset.
138    """
139    image_paths, gt_paths = get_covid_qu_ex_paths(path, split, task, patient_type, segmentation_mask, download)
140
141    if resize_inputs:
142        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
143        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
144            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
145        )
146
147    return torch_em.default_segmentation_dataset(
148        raw_paths=image_paths,
149        raw_key=None,
150        label_paths=gt_paths,
151        label_key=None,
152        patch_shape=patch_shape,
153        **kwargs
154    )

Get the COVID QU EX dataset for lung and infection segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • task: The choice for the subset of dataset. Either 'lung' or 'infection'.
  • patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'. By default is None, i.e. all the patient data will be chosen.
  • segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
  • resize_inputs: Whether to resize the inputs to the patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_covid_qu_ex_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], task: Literal['lung', 'infection'], patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None, segmentation_mask: Literal['lung', 'infection'] = 'lung', resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
157def get_covid_qu_ex_loader(
158    path: Union[os.PathLike, str],
159    batch_size: int,
160    patch_shape: Tuple[int, int],
161    split: Literal['train', 'val', 'test'],
162    task: Literal['lung', 'infection'],
163    patient_type: Optional[Literal['covid19', 'non-covid', 'normal']] = None,
164    segmentation_mask: Literal['lung', 'infection'] = "lung",
165    resize_inputs: bool = False,
166    download: bool = False,
167    **kwargs
168) -> DataLoader:
169    """Get the COVID QU EX dataloader for lung and infection segmentation.
170
171    Args:
172        path: Filepath to a folder where the data is downloaded for further processing.
173        batch_size: The batch size for training.
174        patch_shape: The patch shape to use for training.
175        split: The data split to use. Either 'train', 'val' or 'test'.
176        task: The choice for the subset of dataset. Either 'lung' or 'infection'.
177        patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'.
178            By default is None, i.e. all the patient data will be chosen.
179        segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
180        resize_inputs: Whether to resize the inputs to the patch shape.
181        download: Whether to download the data if it is not present.
182        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
183
184    Returns:
185        The DataLoader.
186    """
187    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
188    dataset = get_covid_qu_ex_dataset(
189        path, patch_shape, split, task, patient_type, segmentation_mask, resize_inputs, download, **ds_kwargs
190    )
191    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the COVID QU EX dataloader for lung and infection segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • task: The choice for the subset of dataset. Either 'lung' or 'infection'.
  • patient_type: The choice of subset of patients. Either 'covid19', 'non-covid' or 'normal'. By default is None, i.e. all the patient data will be chosen.
  • segmentation_mask: The choice of segmentation labels. Either 'lung' or 'infection'.
  • resize_inputs: Whether to resize the inputs to the patch shape.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.