torch_em.data.datasets.medical.jsrt
The JSRT dataset contains annotations for lung segmentation in chest X-Rays.
The database is located at http://db.jsrt.or.jp/eng.php This dataset is from the publication https://doi.org/10.2214/ajr.174.1.1740071. Please cite it if you use this dataset for a publication.
1"""The JSRT dataset contains annotations for lung segmentation 2in chest X-Rays. 3 4The database is located at http://db.jsrt.or.jp/eng.php 5This dataset is from the publication https://doi.org/10.2214/ajr.174.1.1740071. 6Please cite it if you use this dataset for a publication. 7""" 8 9import os 10from glob import glob 11from pathlib import Path 12from typing import Optional, Union, Tuple, Literal, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = { 22 "Segmentation01": "http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2018/11/Segmentation01.zip", 23 "Segmentation02": "http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2019/07/segmentation02.zip" 24} 25 26CHECKSUM = { 27 "Segmentation01": "ab1f26a910bc18eae170928e9f2d98512cc4dc8949bf6cd38b98a93398714fcf", 28 "Segmentation02": "f1432af4fcbd69342cf1bf2ca3d0d43b9535cdc6b160b86191b5b67de2fdbf3c" 29} 30 31ZIP_PATH = { 32 "Segmentation01": "Segmentation01.zip", 33 "Segmentation02": "segmentation02.zip" 34} 35 36DATA_DIR = { 37 "Segmentation01": "Segmentation01", 38 "Segmentation02": "segmentation02" 39} 40 41 42def get_jsrt_data( 43 path: Union[os.PathLike, str], choice: Literal["Segmentation01", "Segmentation02"], download: bool = False 44): 45 """Download the JSRT dataset. 46 47 Args: 48 path: Filepath to a folder where the data is downloaded for further processing. 49 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 50 download: Whether to download the data if it is not present. 51 """ 52 data_dir = os.path.join(path, DATA_DIR[choice]) 53 if os.path.exists(data_dir): 54 return 55 56 os.makedirs(path, exist_ok=True) 57 58 zip_path = os.path.join(path, ZIP_PATH[choice]) 59 60 util.download_source(path=zip_path, url=URL[choice], download=download, checksum=CHECKSUM[choice]) 61 util.unzip(zip_path=zip_path, dst=path) 62 63 64def get_jsrt_paths( 65 path: Union[os.PathLike, str], 66 split: Literal['train', 'test'], 67 choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, 68 download: bool = False, 69) -> Tuple[List[str], List[str]]: 70 """Get paths to the JSRT data. 71 72 Args: 73 path: Filepath to a folder where the data is downloaded for further processing. 74 split: The data split to use. Either 'train', or 'test'. 75 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 76 download: Whether to download the data if it is not present. 77 78 Returns: 79 List of filepaths for the image data. 80 List of filepaths for the label data. 81 """ 82 available_splits = ["train", "test"] 83 assert split in available_splits, f"{split} isn't a valid split choice. Please choose from {available_splits}." 84 85 if choice is None: 86 choice = list(URL.keys()) 87 else: 88 if isinstance(choice, str): 89 choice = [choice] 90 91 image_paths, gt_paths = [], [] 92 for per_choice in choice: 93 get_jsrt_data(path=path, download=download, choice=per_choice) 94 95 if per_choice == "Segmentation01": 96 root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, split) 97 all_image_paths = sorted(glob(os.path.join(root_dir, "org", "*.png"))) 98 all_gt_paths = sorted(glob(os.path.join(root_dir, "label", "*.png"))) 99 100 elif per_choice == "Segmentation02": 101 root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, "segmentation") 102 all_image_paths = sorted(glob(os.path.join(root_dir, f"org_{split}", "*.bmp"))) 103 all_gt_paths = sorted(glob(os.path.join(root_dir, f"label_{split}", "*.png"))) 104 105 else: 106 raise ValueError(f"{per_choice} is not a valid segmentation dataset choice.") 107 108 image_paths.extend(all_image_paths) 109 gt_paths.extend(all_gt_paths) 110 111 assert len(image_paths) == len(gt_paths) 112 113 return image_paths, gt_paths 114 115 116def get_jsrt_dataset( 117 path: Union[os.PathLike, str], 118 patch_shape: Tuple[int, int], 119 split: Literal['train', 'test'], 120 choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, 121 resize_inputs: bool = False, 122 download: bool = False, 123 **kwargs 124) -> Dataset: 125 """Get the JSRT dataset for lung segmentation. 126 127 Args: 128 path: Filepath to a folder where the data is downloaded for further processing. 129 patch_shape: The patch shape to use for training. 130 split: The data split to use. Either 'train', or 'test'. 131 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 132 resize_inputs: Whether to resize the inputs. 133 download: Whether to download the data if it is not present. 134 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 135 136 Returns: 137 The segmentation dataset. 138 """ 139 image_paths, gt_paths = get_jsrt_paths(path, split, choice, download) 140 141 if resize_inputs: 142 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 143 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 144 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 145 ) 146 147 return torch_em.default_segmentation_dataset( 148 raw_paths=image_paths, raw_key=None, label_paths=gt_paths, label_key=None, patch_shape=patch_shape, **kwargs 149 ) 150 151 152def get_jsrt_loader( 153 path: Union[os.PathLike, str], 154 batch_size: int, 155 patch_shape: Tuple[int, int], 156 split: Literal['train', 'test'], 157 choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, 158 resize_inputs: bool = False, 159 download: bool = False, 160 **kwargs 161) -> DataLoader: 162 """Get the JSRT dataloader for lung segmentation. 163 164 Args: 165 path: Filepath to a folder where the data is downloaded for further processing. 166 batch_size: The batch size for training. 167 patch_shape: The patch shape to use for training. 168 split: The data split to use. Either 'train', or 'test'. 169 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 170 resize_inputs: Whether to resize the inputs. 171 download: Whether to download the data if it is not present. 172 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 173 174 Returns: 175 The DataLoader. 176 """ 177 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 178 dataset = get_jsrt_dataset(path, patch_shape, split, choice, resize_inputs, download, **ds_kwargs) 179 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
{'Segmentation01': 'http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2018/11/Segmentation01.zip', 'Segmentation02': 'http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2019/07/segmentation02.zip'}
CHECKSUM =
{'Segmentation01': 'ab1f26a910bc18eae170928e9f2d98512cc4dc8949bf6cd38b98a93398714fcf', 'Segmentation02': 'f1432af4fcbd69342cf1bf2ca3d0d43b9535cdc6b160b86191b5b67de2fdbf3c'}
ZIP_PATH =
{'Segmentation01': 'Segmentation01.zip', 'Segmentation02': 'segmentation02.zip'}
DATA_DIR =
{'Segmentation01': 'Segmentation01', 'Segmentation02': 'segmentation02'}
def
get_jsrt_data( path: Union[os.PathLike, str], choice: Literal['Segmentation01', 'Segmentation02'], download: bool = False):
43def get_jsrt_data( 44 path: Union[os.PathLike, str], choice: Literal["Segmentation01", "Segmentation02"], download: bool = False 45): 46 """Download the JSRT dataset. 47 48 Args: 49 path: Filepath to a folder where the data is downloaded for further processing. 50 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 51 download: Whether to download the data if it is not present. 52 """ 53 data_dir = os.path.join(path, DATA_DIR[choice]) 54 if os.path.exists(data_dir): 55 return 56 57 os.makedirs(path, exist_ok=True) 58 59 zip_path = os.path.join(path, ZIP_PATH[choice]) 60 61 util.download_source(path=zip_path, url=URL[choice], download=download, checksum=CHECKSUM[choice]) 62 util.unzip(zip_path=zip_path, dst=path)
Download the JSRT dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
- download: Whether to download the data if it is not present.
def
get_jsrt_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
65def get_jsrt_paths( 66 path: Union[os.PathLike, str], 67 split: Literal['train', 'test'], 68 choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, 69 download: bool = False, 70) -> Tuple[List[str], List[str]]: 71 """Get paths to the JSRT data. 72 73 Args: 74 path: Filepath to a folder where the data is downloaded for further processing. 75 split: The data split to use. Either 'train', or 'test'. 76 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 77 download: Whether to download the data if it is not present. 78 79 Returns: 80 List of filepaths for the image data. 81 List of filepaths for the label data. 82 """ 83 available_splits = ["train", "test"] 84 assert split in available_splits, f"{split} isn't a valid split choice. Please choose from {available_splits}." 85 86 if choice is None: 87 choice = list(URL.keys()) 88 else: 89 if isinstance(choice, str): 90 choice = [choice] 91 92 image_paths, gt_paths = [], [] 93 for per_choice in choice: 94 get_jsrt_data(path=path, download=download, choice=per_choice) 95 96 if per_choice == "Segmentation01": 97 root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, split) 98 all_image_paths = sorted(glob(os.path.join(root_dir, "org", "*.png"))) 99 all_gt_paths = sorted(glob(os.path.join(root_dir, "label", "*.png"))) 100 101 elif per_choice == "Segmentation02": 102 root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, "segmentation") 103 all_image_paths = sorted(glob(os.path.join(root_dir, f"org_{split}", "*.bmp"))) 104 all_gt_paths = sorted(glob(os.path.join(root_dir, f"label_{split}", "*.png"))) 105 106 else: 107 raise ValueError(f"{per_choice} is not a valid segmentation dataset choice.") 108 109 image_paths.extend(all_image_paths) 110 gt_paths.extend(all_gt_paths) 111 112 assert len(image_paths) == len(gt_paths) 113 114 return image_paths, gt_paths
Get paths to the JSRT data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_jsrt_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
117def get_jsrt_dataset( 118 path: Union[os.PathLike, str], 119 patch_shape: Tuple[int, int], 120 split: Literal['train', 'test'], 121 choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, 122 resize_inputs: bool = False, 123 download: bool = False, 124 **kwargs 125) -> Dataset: 126 """Get the JSRT dataset for lung segmentation. 127 128 Args: 129 path: Filepath to a folder where the data is downloaded for further processing. 130 patch_shape: The patch shape to use for training. 131 split: The data split to use. Either 'train', or 'test'. 132 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 133 resize_inputs: Whether to resize the inputs. 134 download: Whether to download the data if it is not present. 135 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 136 137 Returns: 138 The segmentation dataset. 139 """ 140 image_paths, gt_paths = get_jsrt_paths(path, split, choice, download) 141 142 if resize_inputs: 143 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 144 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 145 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 146 ) 147 148 return torch_em.default_segmentation_dataset( 149 raw_paths=image_paths, raw_key=None, label_paths=gt_paths, label_key=None, patch_shape=patch_shape, **kwargs 150 )
Get the JSRT dataset for lung segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_jsrt_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
153def get_jsrt_loader( 154 path: Union[os.PathLike, str], 155 batch_size: int, 156 patch_shape: Tuple[int, int], 157 split: Literal['train', 'test'], 158 choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, 159 resize_inputs: bool = False, 160 download: bool = False, 161 **kwargs 162) -> DataLoader: 163 """Get the JSRT dataloader for lung segmentation. 164 165 Args: 166 path: Filepath to a folder where the data is downloaded for further processing. 167 batch_size: The batch size for training. 168 patch_shape: The patch shape to use for training. 169 split: The data split to use. Either 'train', or 'test'. 170 choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'. 171 resize_inputs: Whether to resize the inputs. 172 download: Whether to download the data if it is not present. 173 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 174 175 Returns: 176 The DataLoader. 177 """ 178 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 179 dataset = get_jsrt_dataset(path, patch_shape, split, choice, resize_inputs, download, **ds_kwargs) 180 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the JSRT dataloader for lung segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
- resize_inputs: Whether to resize the inputs.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.