torch_em.data.datasets.medical.montgomery
The Montgomery dataset contains annotations for lung segmentation in chest x-ray images.
The database is located at https://data.lhncbc.nlm.nih.gov/public/Tuberculosis-Chest-X-ray-Datasets/Montgomery-County-CXR-Set/MontgomerySet/index.html. This dataset is from the publication:
- https://doi.org/10.1109/TMI.2013.2284099
- https://doi.org/10.1109/tmi.2013.2290491 Please cite them if you use this dataset for your research.
1"""The Montgomery dataset contains annotations for lung segmentation 2in chest x-ray images. 3 4The database is located at 5https://data.lhncbc.nlm.nih.gov/public/Tuberculosis-Chest-X-ray-Datasets/Montgomery-County-CXR-Set/MontgomerySet/index.html. 6This dataset is from the publication: 7- https://doi.org/10.1109/TMI.2013.2284099 8- https://doi.org/10.1109/tmi.2013.2290491 9Please cite them if you use this dataset for your research. 10""" 11 12import os 13from glob import glob 14from tqdm import tqdm 15from typing import Union, Tuple, List 16 17import imageio.v3 as imageio 18 19from torch.utils.data import Dataset, DataLoader 20 21import torch_em 22 23from .. import util 24 25 26URL = "http://openi.nlm.nih.gov/imgs/collections/NLM-MontgomeryCXRSet.zip" 27CHECKSUM = "54601e952315d8f67383e9202a6e145997ade429f54f7e0af44b4e158714f424" 28 29 30def get_montgomery_data(path: Union[os.PathLike, str], download: bool = False) -> str: 31 """Download the Montgomery dataset. 32 33 Args: 34 path: Filepath to a folder where the data is downloaded for further processing. 35 download: Whether to download the data if it is not present. 36 37 Returns: 38 Filepath where the data is downloaded. 39 """ 40 data_dir = os.path.join(path, "MontgomerySet") 41 if os.path.exists(data_dir): 42 return data_dir 43 44 os.makedirs(path, exist_ok=True) 45 46 zip_path = os.path.join(path, "NLM-MontgomeryCXRSet.zip") 47 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 48 util.unzip(zip_path=zip_path, dst=path) 49 50 return data_dir 51 52 53def get_montgomery_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 54 """Get paths to the Montgomery data. 55 56 Args: 57 path: Filepath to a folder where the data is downloaded for further processing. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_dir = get_montgomery_data(path=path, download=download) 65 gt_dir = os.path.join(data_dir, "ManualMask", "gt") 66 67 image_paths = sorted(glob(os.path.join(data_dir, "CXR_png", "*.png"))) 68 69 if os.path.exists(gt_dir): 70 gt_paths = sorted(glob(os.path.join(gt_dir, "*.png"))) 71 if len(image_paths) == len(gt_paths): 72 return image_paths, gt_paths 73 74 else: 75 os.makedirs(gt_dir, exist_ok=True) 76 77 lmask_dir = os.path.join(data_dir, "ManualMask", "leftMask") 78 rmask_dir = os.path.join(data_dir, "ManualMask", "rightMask") 79 gt_paths = [] 80 for image_path in tqdm(image_paths, desc="Merging left and right lung halves"): 81 image_id = os.path.split(image_path)[-1] 82 83 # merge the left and right lung halves into one gt file 84 gt = imageio.imread(os.path.join(lmask_dir, image_id)) 85 gt += imageio.imread(os.path.join(rmask_dir, image_id)) 86 gt = gt.astype("uint8") 87 88 gt_path = os.path.join(gt_dir, image_id) 89 90 imageio.imwrite(gt_path, gt) 91 gt_paths.append(gt_path) 92 93 return image_paths, gt_paths 94 95 96def get_montgomery_dataset( 97 path: Union[os.PathLike, str], 98 patch_shape: Tuple[int, int], 99 resize_inputs: bool = True, 100 download: bool = False, 101 **kwargs 102) -> Dataset: 103 """Get the Montgomery dataset for lung segmentation. 104 105 Args: 106 path: Filepath to a folder where the data is downloaded for further processing. 107 patch_shape: The patch shape to use for training. 108 resize_inputs: Whether to resize the inputs to the patch shape. 109 download: Whether to download the data if it is not present. 110 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 111 112 Returns: 113 The segmentation dataset. 114 """ 115 image_paths, gt_paths = get_montgomery_paths(path, download) 116 117 if resize_inputs: 118 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 119 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 120 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 121 ) 122 123 return torch_em.default_segmentation_dataset( 124 raw_paths=image_paths, 125 raw_key=None, 126 label_paths=gt_paths, 127 label_key=None, 128 patch_shape=patch_shape, 129 **kwargs 130 ) 131 132 133def get_montgomery_loader( 134 path: Union[os.PathLike, str], 135 patch_shape: Tuple[int, int], 136 batch_size: int, 137 resize_inputs: bool = True, 138 download: bool = False, 139 **kwargs 140) -> DataLoader: 141 """Get the Montgomery dataloader for lung segmentation. 142 143 Args: 144 path: Filepath to a folder where the data is downloaded for further processing. 145 patch_shape: The patch shape to use for training. 146 resize_inputs: Whether to resize the inputs to the patch shape. 147 download: Whether to download the data if it is not present. 148 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 149 150 Returns: 151 The DataLoader. 152 """ 153 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 154 dataset = get_montgomery_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 155 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'http://openi.nlm.nih.gov/imgs/collections/NLM-MontgomeryCXRSet.zip'
CHECKSUM =
'54601e952315d8f67383e9202a6e145997ade429f54f7e0af44b4e158714f424'
def
get_montgomery_data(path: Union[os.PathLike, str], download: bool = False) -> str:
31def get_montgomery_data(path: Union[os.PathLike, str], download: bool = False) -> str: 32 """Download the Montgomery dataset. 33 34 Args: 35 path: Filepath to a folder where the data is downloaded for further processing. 36 download: Whether to download the data if it is not present. 37 38 Returns: 39 Filepath where the data is downloaded. 40 """ 41 data_dir = os.path.join(path, "MontgomerySet") 42 if os.path.exists(data_dir): 43 return data_dir 44 45 os.makedirs(path, exist_ok=True) 46 47 zip_path = os.path.join(path, "NLM-MontgomeryCXRSet.zip") 48 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 49 util.unzip(zip_path=zip_path, dst=path) 50 51 return data_dir
Download the Montgomery dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_montgomery_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
54def get_montgomery_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 55 """Get paths to the Montgomery data. 56 57 Args: 58 path: Filepath to a folder where the data is downloaded for further processing. 59 download: Whether to download the data if it is not present. 60 61 Returns: 62 List of filepaths for the image data. 63 List of filepaths for the label data. 64 """ 65 data_dir = get_montgomery_data(path=path, download=download) 66 gt_dir = os.path.join(data_dir, "ManualMask", "gt") 67 68 image_paths = sorted(glob(os.path.join(data_dir, "CXR_png", "*.png"))) 69 70 if os.path.exists(gt_dir): 71 gt_paths = sorted(glob(os.path.join(gt_dir, "*.png"))) 72 if len(image_paths) == len(gt_paths): 73 return image_paths, gt_paths 74 75 else: 76 os.makedirs(gt_dir, exist_ok=True) 77 78 lmask_dir = os.path.join(data_dir, "ManualMask", "leftMask") 79 rmask_dir = os.path.join(data_dir, "ManualMask", "rightMask") 80 gt_paths = [] 81 for image_path in tqdm(image_paths, desc="Merging left and right lung halves"): 82 image_id = os.path.split(image_path)[-1] 83 84 # merge the left and right lung halves into one gt file 85 gt = imageio.imread(os.path.join(lmask_dir, image_id)) 86 gt += imageio.imread(os.path.join(rmask_dir, image_id)) 87 gt = gt.astype("uint8") 88 89 gt_path = os.path.join(gt_dir, image_id) 90 91 imageio.imwrite(gt_path, gt) 92 gt_paths.append(gt_path) 93 94 return image_paths, gt_paths
Get paths to the Montgomery data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_montgomery_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], resize_inputs: bool = True, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
97def get_montgomery_dataset( 98 path: Union[os.PathLike, str], 99 patch_shape: Tuple[int, int], 100 resize_inputs: bool = True, 101 download: bool = False, 102 **kwargs 103) -> Dataset: 104 """Get the Montgomery dataset for lung segmentation. 105 106 Args: 107 path: Filepath to a folder where the data is downloaded for further processing. 108 patch_shape: The patch shape to use for training. 109 resize_inputs: Whether to resize the inputs to the patch shape. 110 download: Whether to download the data if it is not present. 111 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 112 113 Returns: 114 The segmentation dataset. 115 """ 116 image_paths, gt_paths = get_montgomery_paths(path, download) 117 118 if resize_inputs: 119 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 120 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 121 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 122 ) 123 124 return torch_em.default_segmentation_dataset( 125 raw_paths=image_paths, 126 raw_key=None, 127 label_paths=gt_paths, 128 label_key=None, 129 patch_shape=patch_shape, 130 **kwargs 131 )
Get the Montgomery dataset for lung segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_montgomery_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, resize_inputs: bool = True, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
134def get_montgomery_loader( 135 path: Union[os.PathLike, str], 136 patch_shape: Tuple[int, int], 137 batch_size: int, 138 resize_inputs: bool = True, 139 download: bool = False, 140 **kwargs 141) -> DataLoader: 142 """Get the Montgomery dataloader for lung segmentation. 143 144 Args: 145 path: Filepath to a folder where the data is downloaded for further processing. 146 patch_shape: The patch shape to use for training. 147 resize_inputs: Whether to resize the inputs to the patch shape. 148 download: Whether to download the data if it is not present. 149 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 150 151 Returns: 152 The DataLoader. 153 """ 154 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 155 dataset = get_montgomery_dataset(path, patch_shape, resize_inputs, download, **ds_kwargs) 156 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Montgomery dataloader for lung segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.