torch_em.data.datasets.medical.siim_acr

 1import os
 2from glob import glob
 3from typing import Union, Tuple
 4
 5import torch_em
 6from torch_em.transform.generic import ResizeInputs
 7
 8from .. import util
 9from ... import ImageCollectionDataset
10
11
12KAGGLE_DATASET_NAME = "vbookshelf/pneumothorax-chest-xray-images-and-masks"
13CHECKSUM = "1ade68d31adb996c531bb686fb9d02fe11876ddf6f25594ab725e18c69d81538"
14
15
16def get_siim_acr_data(path, download):
17    os.makedirs(path, exist_ok=True)
18
19    data_dir = os.path.join(path, "siim-acr-pneumothorax")
20    if os.path.exists(data_dir):
21        return data_dir
22
23    util.download_source_kaggle(path=path, dataset_name=KAGGLE_DATASET_NAME, download=download)
24
25    zip_path = os.path.join(path, "pneumothorax-chest-xray-images-and-masks.zip")
26    util._check_checksum(path=zip_path, checksum=CHECKSUM)
27    util.unzip(zip_path=zip_path, dst=path)
28
29    return data_dir
30
31
32def _get_siim_acr_paths(path, split, download):
33    data_dir = get_siim_acr_data(path=path, download=download)
34
35    assert split in ["train", "test"], f"'{split}' is not a valid split."
36
37    image_paths = sorted(glob(os.path.join(data_dir, "png_images", f"*_{split}_*.png")))
38    gt_paths = sorted(glob(os.path.join(data_dir, "png_masks", f"*_{split}_*.png")))
39
40    return image_paths, gt_paths
41
42
43def get_siim_acr_dataset(
44    path: Union[os.PathLike, str],
45    split: str,
46    patch_shape: Tuple[int, int],
47    download: bool = False,
48    resize_inputs: bool = False,
49    **kwargs
50):
51    """Dataset for pneumothorax segmentation in CXR.
52
53    The database is located at https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks/data
54
55    This dataset is from the "SIIM-ACR Pneumothorax Segmentation" competition:
56    https://kaggle.com/competitions/siim-acr-pneumothorax-segmentation
57
58    Please cite it if you use this dataset for a publication.
59    """
60    image_paths, gt_paths = _get_siim_acr_paths(path=path, split=split, download=download)
61
62    if resize_inputs:
63        raw_trafo = ResizeInputs(target_shape=patch_shape, is_label=False)
64        label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True)
65        patch_shape = None
66    else:
67        patch_shape = patch_shape
68        raw_trafo, label_trafo = None, None
69
70    dataset = ImageCollectionDataset(
71        raw_image_paths=image_paths,
72        label_image_paths=gt_paths,
73        patch_shape=patch_shape,
74        raw_transform=raw_trafo,
75        label_transform=label_trafo,
76        **kwargs
77    )
78    dataset.max_sampling_attempts = 5000
79
80    return dataset
81
82
83def get_siim_acr_loader(
84    path: Union[os.PathLike, str],
85    split: str,
86    patch_shape: Tuple[int, int],
87    batch_size: int,
88    download: bool = False,
89    resize_inputs: bool = False,
90    **kwargs
91):
92    """Dataloader for pneumothorax segmentation in CXR. See `get_siim_acr_dataset` for details.
93    """
94    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
95    dataset = get_siim_acr_dataset(
96        path=path, split=split, patch_shape=patch_shape, download=download, resize_inputs=resize_inputs, **ds_kwargs
97    )
98    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
99    return loader
KAGGLE_DATASET_NAME = 'vbookshelf/pneumothorax-chest-xray-images-and-masks'
CHECKSUM = '1ade68d31adb996c531bb686fb9d02fe11876ddf6f25594ab725e18c69d81538'
def get_siim_acr_data(path, download):
17def get_siim_acr_data(path, download):
18    os.makedirs(path, exist_ok=True)
19
20    data_dir = os.path.join(path, "siim-acr-pneumothorax")
21    if os.path.exists(data_dir):
22        return data_dir
23
24    util.download_source_kaggle(path=path, dataset_name=KAGGLE_DATASET_NAME, download=download)
25
26    zip_path = os.path.join(path, "pneumothorax-chest-xray-images-and-masks.zip")
27    util._check_checksum(path=zip_path, checksum=CHECKSUM)
28    util.unzip(zip_path=zip_path, dst=path)
29
30    return data_dir
def get_siim_acr_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, resize_inputs: bool = False, **kwargs):
44def get_siim_acr_dataset(
45    path: Union[os.PathLike, str],
46    split: str,
47    patch_shape: Tuple[int, int],
48    download: bool = False,
49    resize_inputs: bool = False,
50    **kwargs
51):
52    """Dataset for pneumothorax segmentation in CXR.
53
54    The database is located at https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks/data
55
56    This dataset is from the "SIIM-ACR Pneumothorax Segmentation" competition:
57    https://kaggle.com/competitions/siim-acr-pneumothorax-segmentation
58
59    Please cite it if you use this dataset for a publication.
60    """
61    image_paths, gt_paths = _get_siim_acr_paths(path=path, split=split, download=download)
62
63    if resize_inputs:
64        raw_trafo = ResizeInputs(target_shape=patch_shape, is_label=False)
65        label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True)
66        patch_shape = None
67    else:
68        patch_shape = patch_shape
69        raw_trafo, label_trafo = None, None
70
71    dataset = ImageCollectionDataset(
72        raw_image_paths=image_paths,
73        label_image_paths=gt_paths,
74        patch_shape=patch_shape,
75        raw_transform=raw_trafo,
76        label_transform=label_trafo,
77        **kwargs
78    )
79    dataset.max_sampling_attempts = 5000
80
81    return dataset

Dataset for pneumothorax segmentation in CXR.

The database is located at https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks/data

This dataset is from the "SIIM-ACR Pneumothorax Segmentation" competition: https://kaggle.com/competitions/siim-acr-pneumothorax-segmentation

Please cite it if you use this dataset for a publication.

def get_siim_acr_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, resize_inputs: bool = False, **kwargs):
 84def get_siim_acr_loader(
 85    path: Union[os.PathLike, str],
 86    split: str,
 87    patch_shape: Tuple[int, int],
 88    batch_size: int,
 89    download: bool = False,
 90    resize_inputs: bool = False,
 91    **kwargs
 92):
 93    """Dataloader for pneumothorax segmentation in CXR. See `get_siim_acr_dataset` for details.
 94    """
 95    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
 96    dataset = get_siim_acr_dataset(
 97        path=path, split=split, patch_shape=patch_shape, download=download, resize_inputs=resize_inputs, **ds_kwargs
 98    )
 99    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
100    return loader

Dataloader for pneumothorax segmentation in CXR. See get_siim_acr_dataset for details.