torch_em.data.datasets.medical.siim_acr
1import os 2from glob import glob 3from typing import Union, Tuple 4 5import torch_em 6from torch_em.transform.generic import ResizeInputs 7 8from .. import util 9from ... import ImageCollectionDataset 10 11 12KAGGLE_DATASET_NAME = "vbookshelf/pneumothorax-chest-xray-images-and-masks" 13CHECKSUM = "1ade68d31adb996c531bb686fb9d02fe11876ddf6f25594ab725e18c69d81538" 14 15 16def get_siim_acr_data(path, download): 17 os.makedirs(path, exist_ok=True) 18 19 data_dir = os.path.join(path, "siim-acr-pneumothorax") 20 if os.path.exists(data_dir): 21 return data_dir 22 23 util.download_source_kaggle(path=path, dataset_name=KAGGLE_DATASET_NAME, download=download) 24 25 zip_path = os.path.join(path, "pneumothorax-chest-xray-images-and-masks.zip") 26 util._check_checksum(path=zip_path, checksum=CHECKSUM) 27 util.unzip(zip_path=zip_path, dst=path) 28 29 return data_dir 30 31 32def _get_siim_acr_paths(path, split, download): 33 data_dir = get_siim_acr_data(path=path, download=download) 34 35 assert split in ["train", "test"], f"'{split}' is not a valid split." 36 37 image_paths = sorted(glob(os.path.join(data_dir, "png_images", f"*_{split}_*.png"))) 38 gt_paths = sorted(glob(os.path.join(data_dir, "png_masks", f"*_{split}_*.png"))) 39 40 return image_paths, gt_paths 41 42 43def get_siim_acr_dataset( 44 path: Union[os.PathLike, str], 45 split: str, 46 patch_shape: Tuple[int, int], 47 download: bool = False, 48 resize_inputs: bool = False, 49 **kwargs 50): 51 """Dataset for pneumothorax segmentation in CXR. 52 53 The database is located at https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks/data 54 55 This dataset is from the "SIIM-ACR Pneumothorax Segmentation" competition: 56 https://kaggle.com/competitions/siim-acr-pneumothorax-segmentation 57 58 Please cite it if you use this dataset for a publication. 59 """ 60 image_paths, gt_paths = _get_siim_acr_paths(path=path, split=split, download=download) 61 62 if resize_inputs: 63 raw_trafo = ResizeInputs(target_shape=patch_shape, is_label=False) 64 label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True) 65 patch_shape = None 66 else: 67 patch_shape = patch_shape 68 raw_trafo, label_trafo = None, None 69 70 dataset = ImageCollectionDataset( 71 raw_image_paths=image_paths, 72 label_image_paths=gt_paths, 73 patch_shape=patch_shape, 74 raw_transform=raw_trafo, 75 label_transform=label_trafo, 76 **kwargs 77 ) 78 dataset.max_sampling_attempts = 5000 79 80 return dataset 81 82 83def get_siim_acr_loader( 84 path: Union[os.PathLike, str], 85 split: str, 86 patch_shape: Tuple[int, int], 87 batch_size: int, 88 download: bool = False, 89 resize_inputs: bool = False, 90 **kwargs 91): 92 """Dataloader for pneumothorax segmentation in CXR. See `get_siim_acr_dataset` for details. 93 """ 94 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 95 dataset = get_siim_acr_dataset( 96 path=path, split=split, patch_shape=patch_shape, download=download, resize_inputs=resize_inputs, **ds_kwargs 97 ) 98 loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) 99 return loader
KAGGLE_DATASET_NAME =
'vbookshelf/pneumothorax-chest-xray-images-and-masks'
CHECKSUM =
'1ade68d31adb996c531bb686fb9d02fe11876ddf6f25594ab725e18c69d81538'
def
get_siim_acr_data(path, download):
17def get_siim_acr_data(path, download): 18 os.makedirs(path, exist_ok=True) 19 20 data_dir = os.path.join(path, "siim-acr-pneumothorax") 21 if os.path.exists(data_dir): 22 return data_dir 23 24 util.download_source_kaggle(path=path, dataset_name=KAGGLE_DATASET_NAME, download=download) 25 26 zip_path = os.path.join(path, "pneumothorax-chest-xray-images-and-masks.zip") 27 util._check_checksum(path=zip_path, checksum=CHECKSUM) 28 util.unzip(zip_path=zip_path, dst=path) 29 30 return data_dir
def
get_siim_acr_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, resize_inputs: bool = False, **kwargs):
44def get_siim_acr_dataset( 45 path: Union[os.PathLike, str], 46 split: str, 47 patch_shape: Tuple[int, int], 48 download: bool = False, 49 resize_inputs: bool = False, 50 **kwargs 51): 52 """Dataset for pneumothorax segmentation in CXR. 53 54 The database is located at https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks/data 55 56 This dataset is from the "SIIM-ACR Pneumothorax Segmentation" competition: 57 https://kaggle.com/competitions/siim-acr-pneumothorax-segmentation 58 59 Please cite it if you use this dataset for a publication. 60 """ 61 image_paths, gt_paths = _get_siim_acr_paths(path=path, split=split, download=download) 62 63 if resize_inputs: 64 raw_trafo = ResizeInputs(target_shape=patch_shape, is_label=False) 65 label_trafo = ResizeInputs(target_shape=patch_shape, is_label=True) 66 patch_shape = None 67 else: 68 patch_shape = patch_shape 69 raw_trafo, label_trafo = None, None 70 71 dataset = ImageCollectionDataset( 72 raw_image_paths=image_paths, 73 label_image_paths=gt_paths, 74 patch_shape=patch_shape, 75 raw_transform=raw_trafo, 76 label_transform=label_trafo, 77 **kwargs 78 ) 79 dataset.max_sampling_attempts = 5000 80 81 return dataset
Dataset for pneumothorax segmentation in CXR.
The database is located at https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks/data
This dataset is from the "SIIM-ACR Pneumothorax Segmentation" competition: https://kaggle.com/competitions/siim-acr-pneumothorax-segmentation
Please cite it if you use this dataset for a publication.
def
get_siim_acr_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, resize_inputs: bool = False, **kwargs):
84def get_siim_acr_loader( 85 path: Union[os.PathLike, str], 86 split: str, 87 patch_shape: Tuple[int, int], 88 batch_size: int, 89 download: bool = False, 90 resize_inputs: bool = False, 91 **kwargs 92): 93 """Dataloader for pneumothorax segmentation in CXR. See `get_siim_acr_dataset` for details. 94 """ 95 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 96 dataset = get_siim_acr_dataset( 97 path=path, split=split, patch_shape=patch_shape, download=download, resize_inputs=resize_inputs, **ds_kwargs 98 ) 99 loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs) 100 return loader
Dataloader for pneumothorax segmentation in CXR. See get_siim_acr_dataset
for details.