torch_em.data.datasets.medical.jsrt

The JSRT dataset contains annotations for lung segmentation in chest X-Rays.

The database is located at http://db.jsrt.or.jp/eng.php This dataset is from the publication https://doi.org/10.2214/ajr.174.1.1740071. Please cite it if you use this dataset for a publication.

  1"""The JSRT dataset contains annotations for lung segmentation
  2in chest X-Rays.
  3
  4The database is located at http://db.jsrt.or.jp/eng.php
  5This dataset is from the publication https://doi.org/10.2214/ajr.174.1.1740071.
  6Please cite it if you use this dataset for a publication.
  7"""
  8
  9import os
 10from glob import glob
 11from pathlib import Path
 12from typing import Optional, Union, Tuple, Literal, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = {
 22    "Segmentation01": "http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2018/11/Segmentation01.zip",
 23    "Segmentation02": "http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2019/07/segmentation02.zip"
 24}
 25
 26CHECKSUM = {
 27    "Segmentation01": "ab1f26a910bc18eae170928e9f2d98512cc4dc8949bf6cd38b98a93398714fcf",
 28    "Segmentation02": "f1432af4fcbd69342cf1bf2ca3d0d43b9535cdc6b160b86191b5b67de2fdbf3c"
 29}
 30
 31ZIP_PATH = {
 32    "Segmentation01": "Segmentation01.zip",
 33    "Segmentation02": "segmentation02.zip"
 34}
 35
 36DATA_DIR = {
 37    "Segmentation01": "Segmentation01",
 38    "Segmentation02": "segmentation02"
 39}
 40
 41
 42def get_jsrt_data(
 43    path: Union[os.PathLike, str], choice: Literal["Segmentation01", "Segmentation02"], download: bool = False
 44):
 45    """Download the JSRT dataset.
 46
 47    Args:
 48        path: Filepath to a folder where the data is downloaded for further processing.
 49        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
 50        download: Whether to download the data if it is not present.
 51    """
 52    data_dir = os.path.join(path, DATA_DIR[choice])
 53    if os.path.exists(data_dir):
 54        return
 55
 56    os.makedirs(path, exist_ok=True)
 57
 58    zip_path = os.path.join(path, ZIP_PATH[choice])
 59
 60    util.download_source(path=zip_path, url=URL[choice], download=download, checksum=CHECKSUM[choice])
 61    util.unzip(zip_path=zip_path, dst=path)
 62
 63
 64def get_jsrt_paths(
 65    path: Union[os.PathLike, str],
 66    split: Literal['train', 'test'],
 67    choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None,
 68    download: bool = False,
 69) -> Tuple[List[str], List[str]]:
 70    """Get paths to the JSRT data.
 71
 72    Args:
 73        path: Filepath to a folder where the data is downloaded for further processing.
 74        split: The data split to use. Either 'train', or 'test'.
 75        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
 76        download: Whether to download the data if it is not present.
 77
 78    Returns:
 79        List of filepaths for the image data.
 80        List of filepaths for the label data.
 81    """
 82    available_splits = ["train", "test"]
 83    assert split in available_splits, f"{split} isn't a valid split choice. Please choose from {available_splits}."
 84
 85    if choice is None:
 86        choice = list(URL.keys())
 87    else:
 88        if isinstance(choice, str):
 89            choice = [choice]
 90
 91    image_paths, gt_paths = [], []
 92    for per_choice in choice:
 93        get_jsrt_data(path=path, download=download, choice=per_choice)
 94
 95        if per_choice == "Segmentation01":
 96            root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, split)
 97            all_image_paths = sorted(glob(os.path.join(root_dir, "org", "*.png")))
 98            all_gt_paths = sorted(glob(os.path.join(root_dir, "label", "*.png")))
 99
100        elif per_choice == "Segmentation02":
101            root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, "segmentation")
102            all_image_paths = sorted(glob(os.path.join(root_dir, f"org_{split}", "*.bmp")))
103            all_gt_paths = sorted(glob(os.path.join(root_dir, f"label_{split}", "*.png")))
104
105        else:
106            raise ValueError(f"{per_choice} is not a valid segmentation dataset choice.")
107
108        image_paths.extend(all_image_paths)
109        gt_paths.extend(all_gt_paths)
110
111    assert len(image_paths) == len(gt_paths)
112
113    return image_paths, gt_paths
114
115
116def get_jsrt_dataset(
117    path: Union[os.PathLike, str],
118    patch_shape: Tuple[int, int],
119    split: Literal['train', 'test'],
120    choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None,
121    resize_inputs: bool = False,
122    download: bool = False,
123    **kwargs
124) -> Dataset:
125    """Get the JSRT dataset for lung segmentation.
126
127    Args:
128        path: Filepath to a folder where the data is downloaded for further processing.
129        patch_shape: The patch shape to use for training.
130        split: The data split to use. Either 'train', or 'test'.
131        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
132        resize_inputs: Whether to resize the inputs.
133        download: Whether to download the data if it is not present.
134        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
135
136    Returns:
137        The segmentation dataset.
138    """
139    image_paths, gt_paths = get_jsrt_paths(path, split, choice, download)
140
141    if resize_inputs:
142        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
143        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
144            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
145        )
146
147    return torch_em.default_segmentation_dataset(
148        raw_paths=image_paths, raw_key=None, label_paths=gt_paths, label_key=None, patch_shape=patch_shape, **kwargs
149    )
150
151
152def get_jsrt_loader(
153    path: Union[os.PathLike, str],
154    batch_size: int,
155    patch_shape: Tuple[int, int],
156    split: Literal['train', 'test'],
157    choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None,
158    resize_inputs: bool = False,
159    download: bool = False,
160    **kwargs
161) -> DataLoader:
162    """Get the JSRT dataloader for lung segmentation.
163
164    Args:
165        path: Filepath to a folder where the data is downloaded for further processing.
166        batch_size: The batch size for training.
167        patch_shape: The patch shape to use for training.
168        split: The data split to use. Either 'train', or 'test'.
169        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
170        resize_inputs: Whether to resize the inputs.
171        download: Whether to download the data if it is not present.
172        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
173
174    Returns:
175        The DataLoader.
176    """
177    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
178    dataset = get_jsrt_dataset(path, patch_shape, split, choice, resize_inputs, download, **ds_kwargs)
179    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = {'Segmentation01': 'http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2018/11/Segmentation01.zip', 'Segmentation02': 'http://imgcom.jsrt.or.jp/imgcom/wp-content/uploads/2019/07/segmentation02.zip'}
CHECKSUM = {'Segmentation01': 'ab1f26a910bc18eae170928e9f2d98512cc4dc8949bf6cd38b98a93398714fcf', 'Segmentation02': 'f1432af4fcbd69342cf1bf2ca3d0d43b9535cdc6b160b86191b5b67de2fdbf3c'}
ZIP_PATH = {'Segmentation01': 'Segmentation01.zip', 'Segmentation02': 'segmentation02.zip'}
DATA_DIR = {'Segmentation01': 'Segmentation01', 'Segmentation02': 'segmentation02'}
def get_jsrt_data( path: Union[os.PathLike, str], choice: Literal['Segmentation01', 'Segmentation02'], download: bool = False):
43def get_jsrt_data(
44    path: Union[os.PathLike, str], choice: Literal["Segmentation01", "Segmentation02"], download: bool = False
45):
46    """Download the JSRT dataset.
47
48    Args:
49        path: Filepath to a folder where the data is downloaded for further processing.
50        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
51        download: Whether to download the data if it is not present.
52    """
53    data_dir = os.path.join(path, DATA_DIR[choice])
54    if os.path.exists(data_dir):
55        return
56
57    os.makedirs(path, exist_ok=True)
58
59    zip_path = os.path.join(path, ZIP_PATH[choice])
60
61    util.download_source(path=zip_path, url=URL[choice], download=download, checksum=CHECKSUM[choice])
62    util.unzip(zip_path=zip_path, dst=path)

Download the JSRT dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
  • download: Whether to download the data if it is not present.
def get_jsrt_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
 65def get_jsrt_paths(
 66    path: Union[os.PathLike, str],
 67    split: Literal['train', 'test'],
 68    choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None,
 69    download: bool = False,
 70) -> Tuple[List[str], List[str]]:
 71    """Get paths to the JSRT data.
 72
 73    Args:
 74        path: Filepath to a folder where the data is downloaded for further processing.
 75        split: The data split to use. Either 'train', or 'test'.
 76        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
 77        download: Whether to download the data if it is not present.
 78
 79    Returns:
 80        List of filepaths for the image data.
 81        List of filepaths for the label data.
 82    """
 83    available_splits = ["train", "test"]
 84    assert split in available_splits, f"{split} isn't a valid split choice. Please choose from {available_splits}."
 85
 86    if choice is None:
 87        choice = list(URL.keys())
 88    else:
 89        if isinstance(choice, str):
 90            choice = [choice]
 91
 92    image_paths, gt_paths = [], []
 93    for per_choice in choice:
 94        get_jsrt_data(path=path, download=download, choice=per_choice)
 95
 96        if per_choice == "Segmentation01":
 97            root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, split)
 98            all_image_paths = sorted(glob(os.path.join(root_dir, "org", "*.png")))
 99            all_gt_paths = sorted(glob(os.path.join(root_dir, "label", "*.png")))
100
101        elif per_choice == "Segmentation02":
102            root_dir = os.path.join(path, Path(ZIP_PATH[per_choice]).stem, "segmentation")
103            all_image_paths = sorted(glob(os.path.join(root_dir, f"org_{split}", "*.bmp")))
104            all_gt_paths = sorted(glob(os.path.join(root_dir, f"label_{split}", "*.png")))
105
106        else:
107            raise ValueError(f"{per_choice} is not a valid segmentation dataset choice.")
108
109        image_paths.extend(all_image_paths)
110        gt_paths.extend(all_gt_paths)
111
112    assert len(image_paths) == len(gt_paths)
113
114    return image_paths, gt_paths

Get paths to the JSRT data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_jsrt_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
117def get_jsrt_dataset(
118    path: Union[os.PathLike, str],
119    patch_shape: Tuple[int, int],
120    split: Literal['train', 'test'],
121    choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None,
122    resize_inputs: bool = False,
123    download: bool = False,
124    **kwargs
125) -> Dataset:
126    """Get the JSRT dataset for lung segmentation.
127
128    Args:
129        path: Filepath to a folder where the data is downloaded for further processing.
130        patch_shape: The patch shape to use for training.
131        split: The data split to use. Either 'train', or 'test'.
132        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
133        resize_inputs: Whether to resize the inputs.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
136
137    Returns:
138        The segmentation dataset.
139    """
140    image_paths, gt_paths = get_jsrt_paths(path, split, choice, download)
141
142    if resize_inputs:
143        resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False}
144        kwargs, patch_shape = util.update_kwargs_for_resize_trafo(
145            kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs
146        )
147
148    return torch_em.default_segmentation_dataset(
149        raw_paths=image_paths, raw_key=None, label_paths=gt_paths, label_key=None, patch_shape=patch_shape, **kwargs
150    )

Get the JSRT dataset for lung segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_jsrt_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
153def get_jsrt_loader(
154    path: Union[os.PathLike, str],
155    batch_size: int,
156    patch_shape: Tuple[int, int],
157    split: Literal['train', 'test'],
158    choice: Optional[Literal['Segmentation01', 'Segmentation02']] = None,
159    resize_inputs: bool = False,
160    download: bool = False,
161    **kwargs
162) -> DataLoader:
163    """Get the JSRT dataloader for lung segmentation.
164
165    Args:
166        path: Filepath to a folder where the data is downloaded for further processing.
167        batch_size: The batch size for training.
168        patch_shape: The patch shape to use for training.
169        split: The data split to use. Either 'train', or 'test'.
170        choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
171        resize_inputs: Whether to resize the inputs.
172        download: Whether to download the data if it is not present.
173        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
174
175    Returns:
176        The DataLoader.
177    """
178    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
179    dataset = get_jsrt_dataset(path, patch_shape, split, choice, resize_inputs, download, **ds_kwargs)
180    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the JSRT dataloader for lung segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of data subset. Either 'Segmentation01' or 'Segmentation02'.
  • resize_inputs: Whether to resize the inputs.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.