torch_em.data.datasets.medical.covid19_seg

The Covid19Seg dataset contains annotations for lung and covid infection in CT scans.

This dataset is located at https://doi.org/10.5281/zenodo.3757476. The dataset is from the publication https://doi.org/10.1002/mp.14676. Please cite it if you use this dataset for your research.

View Source

  1"""The Covid19Seg dataset contains annotations for lung and covid infection in CT scans.
  2
  3This dataset is located at https://doi.org/10.5281/zenodo.3757476.
  4The dataset is from the publication https://doi.org/10.1002/mp.14676.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from pathlib import Path
 11from natsort import natsorted
 12from typing import Union, Tuple, Optional, Literal, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = {
 22    "images": "https://zenodo.org/records/3757476/files/COVID-19-CT-Seg_20cases.zip",
 23    "lung_and_infection": "https://zenodo.org/records/3757476/files/Lung_and_Infection_Mask.zip",
 24    "lung": "https://zenodo.org/records/3757476/files/Lung_Mask.zip",
 25    "infection": "https://zenodo.org/records/3757476/files/Infection_Mask.zip"
 26}
 27
 28CHECKSUM = {
 29    "images": "a5060480eff9315b069b086312dac4872777901fb80d268a5a83edd9f4e7b440",
 30    "lung_and_infection": "34f5a573cb8fb53cb15abe81868395d9addf436854826a6fd6e70c2b294f19c3",
 31    "lung": "f060b0d0299939a6d95ddefdbfa281de1a779c4d230a5adbd32414711d6d8187",
 32    "infection": "87901c73fdd2230260e61d2dbc57bf56026efc28264006b8ea2bf411453c1694"
 33}
 34
 35ZIP_FNAMES = {
 36    "images": "COVID-19-CT-Seg_20cases.zip",
 37    "lung_and_infection": "Lung_and_Infection_Mask.zip",
 38    "lung": "Lung_Mask.zip",
 39    "infection": "Infection_Mask.zip"
 40}
 41
 42
 43def get_covid19_seg_data(
 44    path: Union[os.PathLike, str],
 45    task: Literal['lung', 'infection', 'lung_and_infection'],
 46    download: bool = False
 47) -> Tuple[str, str]:
 48    """Download the Covid19Seg dataset.
 49
 50    Args:
 51        path: Filepath to a folder where the data is downloaded for further processing.
 52        task: The choice of labels for specific task.
 53        download: Whether to download the data if it is not present.
 54
 55    Returns:
 56        Filepath where the image data is downloaded.
 57        Filepath where the label data is downloaded.
 58    """
 59    im_dir = os.path.join(path, "images", Path(ZIP_FNAMES["images"]).stem)
 60    gt_dir = os.path.join(path, "gt", Path(ZIP_FNAMES[task]).stem)
 61
 62    if os.path.exists(im_dir) and os.path.exists(gt_dir):
 63        return im_dir, gt_dir
 64
 65    os.makedirs(path, exist_ok=True)
 66
 67    im_zip_path = os.path.join(path, ZIP_FNAMES["images"])
 68    gt_zip_path = os.path.join(path, ZIP_FNAMES[task])
 69
 70    # download the images
 71    util.download_source(path=im_zip_path, url=URL["images"], download=download, checksum=CHECKSUM["images"])
 72    util.unzip(zip_path=im_zip_path, dst=im_dir, remove=False)
 73
 74    # download the labels
 75    util.download_source(path=gt_zip_path, url=URL[task], download=download, checksum=CHECKSUM[task])
 76    util.unzip(zip_path=gt_zip_path, dst=gt_dir)
 77
 78    return im_dir, gt_dir
 79
 80
 81def get_covid19_seg_paths(
 82    path: Union[os.PathLike, str],
 83    task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None,
 84    download: bool = False
 85) -> Tuple[List[str], List[str]]:
 86    """Get paths to the Covid19Seg data.
 87
 88    Args:
 89        path: Filepath to a folder where the data is downloaded for further processing.
 90        task: The choice of labels for specific task.
 91        download: Whether to download the data if it is not present.
 92
 93    Returns:
 94        List of filepaths for the image data.
 95        List of filepaths for the label data.
 96    """
 97    if task is None:
 98        task = "lung_and_infection"
 99    else:
100        assert task in ["lung", "infection", "lung_and_infection"], f"{task} is not a valid task."
101
102    image_dir, gt_dir = get_covid19_seg_data(path, task, download)
103    image_paths = natsorted(glob(os.path.join(image_dir, "*.nii.gz")))
104    gt_paths = natsorted(glob(os.path.join(gt_dir, "*.nii.gz")))
105    return image_paths, gt_paths
106
107
108def get_covid19_seg_dataset(
109    path: Union[os.PathLike, str],
110    patch_shape: Tuple[int, int],
111    task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None,
112    resize_inputs: bool = False,
113    download: bool = False,
114    **kwargs
115) -> Dataset:
116    """Get the Covid19Seg dataset for lung and covid infection segmentation in CT scans.
117
118    Args:
119        path: Filepath to a folder where the data is downloaded for further processing.
120        patch_shape: The patch shape to use for training.
121        task: The choice of labels for specific task.
122        resize_inputs: Whether to resize the inputs to the patch shape.
123        download: Whether to download the data if it is not present.
124        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
125
126    Returns:
127        The segmentation dataset.
128    """
129    image_paths, gt_paths = get_covid19_seg_paths(path, task, download)
130
131    if resize_inputs:
132        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
133        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
134            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
135        )
136
137    return torch_em.default_segmentation_dataset(
138        raw_paths=image_paths,
139        raw_key="data",
140        label_paths=gt_paths,
141        label_key="data",
142        patch_shape=patch_shape,
143        is_seg_dataset=True,
144        **kwargs
145    )
146
147
148def get_covid19_seg_loader(
149    path: Union[os.PathLike, str],
150    batch_size: int,
151    patch_shape: Tuple[int, int],
152    task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None,
153    download: bool = False,
154    **kwargs
155) -> DataLoader:
156    """Get the Covid19Seg dataloader for lung and covid infection segmentation in CT scans.
157
158    Args:
159        path: Filepath to a folder where the data is downloaded for further processing.
160        patch_shape: The patch shape to use for training.
161        task: The choice of labels for specific task.
162        resize_inputs: Whether to resize the inputs to the patch shape.
163        download: Whether to download the data if it is not present.
164        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
165
166    Returns:
167        The DataLoader.
168    """
169    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
170    dataset = get_covid19_seg_dataset(path, patch_shape, task, download, **ds_kwargs)
171    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = {'images': 'https://zenodo.org/records/3757476/files/COVID-19-CT-Seg_20cases.zip', 'lung_and_infection': 'https://zenodo.org/records/3757476/files/Lung_and_Infection_Mask.zip', 'lung': 'https://zenodo.org/records/3757476/files/Lung_Mask.zip', 'infection': 'https://zenodo.org/records/3757476/files/Infection_Mask.zip'}

CHECKSUM = {'images': 'a5060480eff9315b069b086312dac4872777901fb80d268a5a83edd9f4e7b440', 'lung_and_infection': '34f5a573cb8fb53cb15abe81868395d9addf436854826a6fd6e70c2b294f19c3', 'lung': 'f060b0d0299939a6d95ddefdbfa281de1a779c4d230a5adbd32414711d6d8187', 'infection': '87901c73fdd2230260e61d2dbc57bf56026efc28264006b8ea2bf411453c1694'}

ZIP_FNAMES = {'images': 'COVID-19-CT-Seg_20cases.zip', 'lung_and_infection': 'Lung_and_Infection_Mask.zip', 'lung': 'Lung_Mask.zip', 'infection': 'Infection_Mask.zip'}

def get_covid19_seg_data( path: Union[os.PathLike, str], task: Literal['lung', 'infection', 'lung_and_infection'], download: bool = False) -> Tuple[str, str]: View Source

44def get_covid19_seg_data(
45    path: Union[os.PathLike, str],
46    task: Literal['lung', 'infection', 'lung_and_infection'],
47    download: bool = False
48) -> Tuple[str, str]:
49    """Download the Covid19Seg dataset.
50
51    Args:
52        path: Filepath to a folder where the data is downloaded for further processing.
53        task: The choice of labels for specific task.
54        download: Whether to download the data if it is not present.
55
56    Returns:
57        Filepath where the image data is downloaded.
58        Filepath where the label data is downloaded.
59    """
60    im_dir = os.path.join(path, "images", Path(ZIP_FNAMES["images"]).stem)
61    gt_dir = os.path.join(path, "gt", Path(ZIP_FNAMES[task]).stem)
62
63    if os.path.exists(im_dir) and os.path.exists(gt_dir):
64        return im_dir, gt_dir
65
66    os.makedirs(path, exist_ok=True)
67
68    im_zip_path = os.path.join(path, ZIP_FNAMES["images"])
69    gt_zip_path = os.path.join(path, ZIP_FNAMES[task])
70
71    # download the images
72    util.download_source(path=im_zip_path, url=URL["images"], download=download, checksum=CHECKSUM["images"])
73    util.unzip(zip_path=im_zip_path, dst=im_dir, remove=False)
74
75    # download the labels
76    util.download_source(path=gt_zip_path, url=URL[task], download=download, checksum=CHECKSUM[task])
77    util.unzip(zip_path=gt_zip_path, dst=gt_dir)
78
79    return im_dir, gt_dir

Download the Covid19Seg dataset.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
task: The choice of labels for specific task.
download: Whether to download the data if it is not present.

Returns:

Filepath where the image data is downloaded. Filepath where the label data is downloaded.

def get_covid19_seg_paths( path: Union[os.PathLike, str], task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, download: bool = False) -> Tuple[List[str], List[str]]: View Source

 82def get_covid19_seg_paths(
 83    path: Union[os.PathLike, str],
 84    task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None,
 85    download: bool = False
 86) -> Tuple[List[str], List[str]]:
 87    """Get paths to the Covid19Seg data.
 88
 89    Args:
 90        path: Filepath to a folder where the data is downloaded for further processing.
 91        task: The choice of labels for specific task.
 92        download: Whether to download the data if it is not present.
 93
 94    Returns:
 95        List of filepaths for the image data.
 96        List of filepaths for the label data.
 97    """
 98    if task is None:
 99        task = "lung_and_infection"
100    else:
101        assert task in ["lung", "infection", "lung_and_infection"], f"{task} is not a valid task."
102
103    image_dir, gt_dir = get_covid19_seg_data(path, task, download)
104    image_paths = natsorted(glob(os.path.join(image_dir, "*.nii.gz")))
105    gt_paths = natsorted(glob(os.path.join(gt_dir, "*.nii.gz")))
106    return image_paths, gt_paths

Get paths to the Covid19Seg data.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
task: The choice of labels for specific task.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_covid19_seg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

109def get_covid19_seg_dataset(
110    path: Union[os.PathLike, str],
111    patch_shape: Tuple[int, int],
112    task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None,
113    resize_inputs: bool = False,
114    download: bool = False,
115    **kwargs
116) -> Dataset:
117    """Get the Covid19Seg dataset for lung and covid infection segmentation in CT scans.
118
119    Args:
120        path: Filepath to a folder where the data is downloaded for further processing.
121        patch_shape: The patch shape to use for training.
122        task: The choice of labels for specific task.
123        resize_inputs: Whether to resize the inputs to the patch shape.
124        download: Whether to download the data if it is not present.
125        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
126
127    Returns:
128        The segmentation dataset.
129    """
130    image_paths, gt_paths = get_covid19_seg_paths(path, task, download)
131
132    if resize_inputs:
133        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
134        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
135            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
136        )
137
138    return torch_em.default_segmentation_dataset(
139        raw_paths=image_paths,
140        raw_key="data",
141        label_paths=gt_paths,
142        label_key="data",
143        patch_shape=patch_shape,
144        is_seg_dataset=True,
145        **kwargs
146    )

Get the Covid19Seg dataset for lung and covid infection segmentation in CT scans.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
task: The choice of labels for specific task.
resize_inputs: Whether to resize the inputs to the patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_covid19_seg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

149def get_covid19_seg_loader(
150    path: Union[os.PathLike, str],
151    batch_size: int,
152    patch_shape: Tuple[int, int],
153    task: Optional[Literal['lung', 'infection', 'lung_and_infection']] = None,
154    download: bool = False,
155    **kwargs
156) -> DataLoader:
157    """Get the Covid19Seg dataloader for lung and covid infection segmentation in CT scans.
158
159    Args:
160        path: Filepath to a folder where the data is downloaded for further processing.
161        patch_shape: The patch shape to use for training.
162        task: The choice of labels for specific task.
163        resize_inputs: Whether to resize the inputs to the patch shape.
164        download: Whether to download the data if it is not present.
165        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
166
167    Returns:
168        The DataLoader.
169    """
170    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
171    dataset = get_covid19_seg_dataset(path, patch_shape, task, download, **ds_kwargs)
172    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Covid19Seg dataloader for lung and covid infection segmentation in CT scans.

Arguments:

path: Filepath to a folder where the data is downloaded for further processing.
patch_shape: The patch shape to use for training.
task: The choice of labels for specific task.
resize_inputs: Whether to resize the inputs to the patch shape.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.