torch_em.data.datasets.medical.dsad
The DSAD dataset contains annotations for abdominal organs in laparoscopy images.
This dataset is located at https://springernature.figshare.com/articles/dataset/The_Dresden_Surgical_Anatomy_Dataset_for_abdominal_organ_segmentation_in_surgical_data_science/21702600 # noqa The dataset is from the publication https://doi.org/10.1038/s41597-022-01719-2. Please cite it if you use this dataset for your research.
1"""The DSAD dataset contains annotations for abdominal organs in laparoscopy images. 2 3This dataset is located at https://springernature.figshare.com/articles/dataset/The_Dresden_Surgical_Anatomy_Dataset_for_abdominal_organ_segmentation_in_surgical_data_science/21702600 # noqa 4The dataset is from the publication https://doi.org/10.1038/s41597-022-01719-2. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from natsort import natsorted 11from typing import Union, Tuple, List, Optional 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20URL = "https://springernature.figshare.com/ndownloader/files/38494425" 21CHECKSUM = "b8a8ade37d106fc1641a901d1c843806f2d27f9f8e18f4614b043e7e2ca2e40f" 22ORGANS = [ 23 "abdominal_wall", "inferior_mesenteric_artery", "liver", "pancreas", "spleen", "ureter", 24 "colon", "intestinal_veins", "multilabel", "small_intestine", "stomach", "vesicular_glands" 25] 26 27 28def get_dsad_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the DSAD dataset. 30 31 Args: 32 path: Filepath to a folder where the data is downloaded for further processing. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 Filepath where the data is downloaded. 37 """ 38 data_dir = os.path.join(path, "data") 39 if os.path.exists(data_dir): 40 return data_dir 41 42 os.makedirs(path, exist_ok=True) 43 44 zip_path = os.path.join(path, "data.zip") 45 print("Downloading the DSAD data. Might take several minutes depending on your internet connection.") 46 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 47 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"), remove=False) 48 print("The download has finished and the data has been unzipped to a target folder.") 49 50 return data_dir 51 52 53def get_dsad_paths( 54 path: Union[os.PathLike, str], organ: Optional[str] = None, download: bool = False 55) -> Tuple[List[str], List[str]]: 56 """Get paths to the DSAD data. 57 58 Args: 59 path: Filepath to a folder where the data is downloaded for further processing. 60 organ: The choice of organ annotations. 61 download: Whether to download the data if it is not present. 62 63 Returns: 64 List of filepaths for the image data. 65 List of filepaths for the label data. 66 """ 67 data_dir = get_dsad_data(path, download) 68 69 if organ is None: 70 organ = "*" 71 else: 72 assert organ in ORGANS, f"'{organ}' is not a valid organ choice." 73 assert isinstance(organ, str), "We currently support choosing one organ at a time." 74 75 image_paths = natsorted(glob(os.path.join(data_dir, organ, "*", "image*.png"))) 76 # Remove multi-label inputs. 77 image_paths = [p for p in image_paths if "multilabel" not in p] 78 79 # Get label paths. 80 mask_paths = [p.replace("image", "mask") for p in image_paths] 81 assert all([os.path.exists(p) for p in mask_paths]) 82 83 assert image_paths and len(image_paths) == len(mask_paths) 84 85 return image_paths, mask_paths 86 87 88def get_dsad_dataset( 89 path: Union[os.PathLike, str], 90 patch_shape: Tuple[int, int], 91 organ: Optional[str] = None, 92 resize_inputs: bool = False, 93 download: bool = False, 94 **kwargs 95) -> Dataset: 96 """Get the DSAD dataset for organ segmentation. 97 98 Args: 99 path: Filepath to a folder where the data is downloaded for further processing. 100 patch_shape: The patch shape to use for training. 101 organ: The choice of organ annotations. 102 resize_inputs: Whether to resize the inputs to the expected patch shape. 103 download: Whether to download the data if it is not present. 104 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 105 106 Returns: 107 The segmentation dataset. 108 """ 109 image_paths, mask_paths = get_dsad_paths(path, organ, download) 110 111 if resize_inputs: 112 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 113 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 114 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 115 ) 116 117 return torch_em.default_segmentation_dataset( 118 raw_paths=image_paths, 119 raw_key=None, 120 label_paths=mask_paths, 121 label_key=None, 122 patch_shape=patch_shape, 123 with_channels=True, 124 is_seg_dataset=False, 125 **kwargs 126 ) 127 128 129def get_dsad_loader( 130 path: Union[os.PathLike, str], 131 batch_size: int, 132 patch_shape: Tuple[int, int], 133 organ: Optional[str] = None, 134 resize_inputs: bool = False, 135 download: bool = False, 136 **kwargs 137) -> DataLoader: 138 """Get the DSAD dataloader for organ segmentation. 139 140 Args: 141 path: Filepath to a folder where the data is downloaded for further processing. 142 batch_size: The batch size for training. 143 patch_shape: The patch shape to use for training. 144 organ: The choice of organ annotations. 145 resize_inputs: Whether to resize the inputs to the expected patch shape. 146 download: Whether to download the data if it is not present. 147 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 148 149 Returns: 150 The Dataloader. 151 """ 152 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 153 dataset = get_dsad_dataset(path, patch_shape, organ, resize_inputs, download, **ds_kwargs) 154 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://springernature.figshare.com/ndownloader/files/38494425'
CHECKSUM =
'b8a8ade37d106fc1641a901d1c843806f2d27f9f8e18f4614b043e7e2ca2e40f'
ORGANS =
['abdominal_wall', 'inferior_mesenteric_artery', 'liver', 'pancreas', 'spleen', 'ureter', 'colon', 'intestinal_veins', 'multilabel', 'small_intestine', 'stomach', 'vesicular_glands']
def
get_dsad_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_dsad_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the DSAD dataset. 31 32 Args: 33 path: Filepath to a folder where the data is downloaded for further processing. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 Filepath where the data is downloaded. 38 """ 39 data_dir = os.path.join(path, "data") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "data.zip") 46 print("Downloading the DSAD data. Might take several minutes depending on your internet connection.") 47 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 48 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"), remove=False) 49 print("The download has finished and the data has been unzipped to a target folder.") 50 51 return data_dir
Download the DSAD dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_dsad_paths( path: Union[os.PathLike, str], organ: Optional[str] = None, download: bool = False) -> Tuple[List[str], List[str]]:
54def get_dsad_paths( 55 path: Union[os.PathLike, str], organ: Optional[str] = None, download: bool = False 56) -> Tuple[List[str], List[str]]: 57 """Get paths to the DSAD data. 58 59 Args: 60 path: Filepath to a folder where the data is downloaded for further processing. 61 organ: The choice of organ annotations. 62 download: Whether to download the data if it is not present. 63 64 Returns: 65 List of filepaths for the image data. 66 List of filepaths for the label data. 67 """ 68 data_dir = get_dsad_data(path, download) 69 70 if organ is None: 71 organ = "*" 72 else: 73 assert organ in ORGANS, f"'{organ}' is not a valid organ choice." 74 assert isinstance(organ, str), "We currently support choosing one organ at a time." 75 76 image_paths = natsorted(glob(os.path.join(data_dir, organ, "*", "image*.png"))) 77 # Remove multi-label inputs. 78 image_paths = [p for p in image_paths if "multilabel" not in p] 79 80 # Get label paths. 81 mask_paths = [p.replace("image", "mask") for p in image_paths] 82 assert all([os.path.exists(p) for p in mask_paths]) 83 84 assert image_paths and len(image_paths) == len(mask_paths) 85 86 return image_paths, mask_paths
Get paths to the DSAD data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- organ: The choice of organ annotations.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_dsad_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], organ: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
89def get_dsad_dataset( 90 path: Union[os.PathLike, str], 91 patch_shape: Tuple[int, int], 92 organ: Optional[str] = None, 93 resize_inputs: bool = False, 94 download: bool = False, 95 **kwargs 96) -> Dataset: 97 """Get the DSAD dataset for organ segmentation. 98 99 Args: 100 path: Filepath to a folder where the data is downloaded for further processing. 101 patch_shape: The patch shape to use for training. 102 organ: The choice of organ annotations. 103 resize_inputs: Whether to resize the inputs to the expected patch shape. 104 download: Whether to download the data if it is not present. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 106 107 Returns: 108 The segmentation dataset. 109 """ 110 image_paths, mask_paths = get_dsad_paths(path, organ, download) 111 112 if resize_inputs: 113 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": True} 114 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 115 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 116 ) 117 118 return torch_em.default_segmentation_dataset( 119 raw_paths=image_paths, 120 raw_key=None, 121 label_paths=mask_paths, 122 label_key=None, 123 patch_shape=patch_shape, 124 with_channels=True, 125 is_seg_dataset=False, 126 **kwargs 127 )
Get the DSAD dataset for organ segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- organ: The choice of organ annotations.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_dsad_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], organ: Optional[str] = None, resize_inputs: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
130def get_dsad_loader( 131 path: Union[os.PathLike, str], 132 batch_size: int, 133 patch_shape: Tuple[int, int], 134 organ: Optional[str] = None, 135 resize_inputs: bool = False, 136 download: bool = False, 137 **kwargs 138) -> DataLoader: 139 """Get the DSAD dataloader for organ segmentation. 140 141 Args: 142 path: Filepath to a folder where the data is downloaded for further processing. 143 batch_size: The batch size for training. 144 patch_shape: The patch shape to use for training. 145 organ: The choice of organ annotations. 146 resize_inputs: Whether to resize the inputs to the expected patch shape. 147 download: Whether to download the data if it is not present. 148 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 149 150 Returns: 151 The Dataloader. 152 """ 153 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 154 dataset = get_dsad_dataset(path, patch_shape, organ, resize_inputs, download, **ds_kwargs) 155 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the DSAD dataloader for organ segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- organ: The choice of organ annotations.
- resize_inputs: Whether to resize the inputs to the expected patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The Dataloader.