torch_em.data.datasets.medical.duke_liver
The Duke Liver dataset contains annotations for liver segmentation in MRI scans.
NOTE: This dataset is located at https://doi.org/10.5281/zenodo.7774566. Please see 'get_duke_liver_data' for instructions on downloading the dataset.
The dataset is from the publication https://doi.org/10.1148/ryai.220275. Please cite it if you use this dataset for your research.
1"""The Duke Liver dataset contains annotations for liver segmentation in MRI scans. 2 3NOTE: This dataset is located at https://doi.org/10.5281/zenodo.7774566. 4Please see 'get_duke_liver_data' for instructions on downloading the dataset. 5 6The dataset is from the publication https://doi.org/10.1148/ryai.220275. 7Please cite it if you use this dataset for your research. 8""" 9 10import os 11from glob import glob 12from tqdm import tqdm 13from natsort import natsorted 14from typing import Union, Tuple, Literal, List 15 16import numpy as np 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25def get_duke_liver_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Get the Duke Liver dataset. 27 28 The dataset is located at https://doi.org/10.5281/zenodo.7774566. 29 30 Follow the instructions below to get access to the dataset. 31 - Visit the zenodo site attached above. 32 - Send a request message alongwith some details to get access to the dataset. 33 - The authors would accept the request, then you can access the dataset. 34 - Next, download the `Segmentation.zip` file and provide the path where the zip file is stored. 35 36 Args: 37 path: Filepath to a folder where the data needs to be downloaded for further processing. 38 download: Whether to download the data if it is not present. 39 40 Returns: 41 Filepath where the data is preprocessed. 42 """ 43 data_dir = os.path.join(path, "data", "Segmentation") 44 if os.path.exists(data_dir): 45 return data_dir 46 47 if download: 48 raise NotImplementedError( 49 "Automatic download for Duke Liver dataset is not possible. See `get_duke_liver_data` for details." 50 ) 51 52 zip_path = os.path.join(path, "Segmentation.zip") 53 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"), remove=False) 54 55 return data_dir 56 57 58def _preprocess_data(path, data_dir): 59 preprocess_dir = os.path.join(path, "data", "preprocessed") 60 61 if os.path.exists(preprocess_dir): 62 _image_paths = natsorted(glob(os.path.join(preprocess_dir, "images", "*.nii.gz"))) 63 _gt_paths = natsorted(glob(os.path.join(preprocess_dir, "masks", "*.nii.gz"))) 64 return _image_paths, _gt_paths 65 66 os.makedirs(os.path.join(preprocess_dir, "images"), exist_ok=True) 67 os.makedirs(os.path.join(preprocess_dir, "masks"), exist_ok=True) 68 69 image_paths, gt_paths = [], [] 70 for patient_dir in tqdm(glob(os.path.join(data_dir, "00*"))): 71 patient_id = os.path.split(patient_dir)[-1] 72 73 for sub_id_dir in glob(os.path.join(patient_dir, "*")): 74 sub_id = os.path.split(sub_id_dir)[-1] 75 76 image_path = os.path.join(preprocess_dir, "images", f"{patient_id}_{sub_id}.nii.gz") 77 gt_path = os.path.join(preprocess_dir, "masks", f"{patient_id}_{sub_id}.nii.gz") 78 79 image_paths.append(image_path) 80 gt_paths.append(gt_path) 81 82 if os.path.exists(image_path) and os.path.exists(gt_path): 83 continue 84 85 image_slice_paths = natsorted(glob(os.path.join(sub_id_dir, "images", "*.dicom"))) 86 gt_slice_paths = natsorted(glob(os.path.join(sub_id_dir, "masks", "*.dicom"))) 87 88 import pydicom as dicom 89 import nibabel as nib 90 91 images, gts = [], [] 92 for image_slice_path, gt_slice_path in zip(image_slice_paths, gt_slice_paths): 93 image_slice = dicom.dcmread(image_slice_path).pixel_array 94 gt_slice = dicom.dcmread(gt_slice_path).pixel_array 95 96 images.append(image_slice) 97 gts.append(gt_slice) 98 99 image = np.stack(images).transpose(1, 2, 0) 100 gt = np.stack(gts).transpose(1, 2, 0) 101 102 assert image.shape == gt.shape 103 104 image = nib.Nifti2Image(image, np.eye(4)) 105 gt = nib.Nifti2Image(gt, np.eye(4)) 106 107 nib.save(image, image_path) 108 nib.save(gt, gt_path) 109 110 return natsorted(image_paths), natsorted(gt_paths) 111 112 113def get_duke_liver_paths( 114 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 115) -> Tuple[List[str], List[str]]: 116 """Get paths to the Duke Liver dataset. 117 118 Args: 119 path: Filepath to a folder where the data needs to be downloaded for further processing. 120 split: The choice of data split. 121 download: Whether to download the data if it is not present. 122 123 Returns: 124 List of filepaths for the image data. 125 List of filepaths for the label data. 126 """ 127 data_dir = get_duke_liver_data(path=path, download=download) 128 129 image_paths, gt_paths = _preprocess_data(path=path, data_dir=data_dir) 130 131 if split == "train": 132 image_paths, gt_paths = image_paths[:250], gt_paths[:250] 133 elif split == "val": 134 image_paths, gt_paths = image_paths[250:260], gt_paths[250:260] 135 elif split == "test": 136 image_paths, gt_paths = image_paths[260:], gt_paths[260:] 137 else: 138 raise ValueError(f"'{split}' is not a valid split.") 139 140 return image_paths, gt_paths 141 142 143def get_duke_liver_dataset( 144 path: Union[os.PathLike, str], 145 patch_shape: Tuple[int, ...], 146 split: Literal['train', 'val', 'test'], 147 resize_inputs: bool = False, 148 download: bool = False, 149 **kwargs 150) -> Dataset: 151 """Get the Duke Liver dataset for segmentation of liver in MRI. 152 153 Args: 154 path: Filepath to a folder where the data needs to be downloaded for further processing. 155 patch_shape: The patch shape to use for training. 156 split: The choice of data split. 157 resize_inputs: Whether to resize the inputs to the patch shape. 158 download: Whether to download the data if it is not present. 159 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 160 161 Returns: 162 The segmentation dataset. 163 """ 164 image_paths, gt_paths = get_duke_liver_paths(path, split, download) 165 166 if resize_inputs: 167 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 168 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 169 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 170 ) 171 172 return torch_em.default_segmentation_dataset( 173 raw_paths=image_paths, 174 raw_key="data", 175 label_paths=gt_paths, 176 label_key="data", 177 is_seg_dataset=True, 178 patch_shape=patch_shape, 179 **kwargs 180 ) 181 182 183def get_duke_liver_loader( 184 path: Union[os.PathLike, str], 185 batch_size: int, 186 patch_shape: Tuple[int, ...], 187 split: Literal['train', 'val', 'test'], 188 resize_inputs: bool = False, 189 download: bool = False, 190 **kwargs 191) -> DataLoader: 192 """Get the Duke Liver dataloader for segmentation of liver in MRI. 193 194 Args: 195 path: Filepath to a folder where the data needs to be downloaded for further processing. 196 batch_size: The batch size for training. 197 patch_shape: The patch shape to use for training. 198 split: The choice of data split. 199 resize_inputs: Whether to resize the inputs to the patch shape. 200 download: Whether to download the data if it is not present. 201 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 202 203 Returns: 204 The DataLoader. 205 """ 206 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 207 dataset = get_duke_liver_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 208 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
26def get_duke_liver_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Get the Duke Liver dataset. 28 29 The dataset is located at https://doi.org/10.5281/zenodo.7774566. 30 31 Follow the instructions below to get access to the dataset. 32 - Visit the zenodo site attached above. 33 - Send a request message alongwith some details to get access to the dataset. 34 - The authors would accept the request, then you can access the dataset. 35 - Next, download the `Segmentation.zip` file and provide the path where the zip file is stored. 36 37 Args: 38 path: Filepath to a folder where the data needs to be downloaded for further processing. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 Filepath where the data is preprocessed. 43 """ 44 data_dir = os.path.join(path, "data", "Segmentation") 45 if os.path.exists(data_dir): 46 return data_dir 47 48 if download: 49 raise NotImplementedError( 50 "Automatic download for Duke Liver dataset is not possible. See `get_duke_liver_data` for details." 51 ) 52 53 zip_path = os.path.join(path, "Segmentation.zip") 54 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"), remove=False) 55 56 return data_dir
Get the Duke Liver dataset.
The dataset is located at https://doi.org/10.5281/zenodo.7774566.
Follow the instructions below to get access to the dataset.
- Visit the zenodo site attached above.
- Send a request message alongwith some details to get access to the dataset.
- The authors would accept the request, then you can access the dataset.
- Next, download the
Segmentation.zip
file and provide the path where the zip file is stored.
Arguments:
- path: Filepath to a folder where the data needs to be downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is preprocessed.
114def get_duke_liver_paths( 115 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False 116) -> Tuple[List[str], List[str]]: 117 """Get paths to the Duke Liver dataset. 118 119 Args: 120 path: Filepath to a folder where the data needs to be downloaded for further processing. 121 split: The choice of data split. 122 download: Whether to download the data if it is not present. 123 124 Returns: 125 List of filepaths for the image data. 126 List of filepaths for the label data. 127 """ 128 data_dir = get_duke_liver_data(path=path, download=download) 129 130 image_paths, gt_paths = _preprocess_data(path=path, data_dir=data_dir) 131 132 if split == "train": 133 image_paths, gt_paths = image_paths[:250], gt_paths[:250] 134 elif split == "val": 135 image_paths, gt_paths = image_paths[250:260], gt_paths[250:260] 136 elif split == "test": 137 image_paths, gt_paths = image_paths[260:], gt_paths[260:] 138 else: 139 raise ValueError(f"'{split}' is not a valid split.") 140 141 return image_paths, gt_paths
Get paths to the Duke Liver dataset.
Arguments:
- path: Filepath to a folder where the data needs to be downloaded for further processing.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
144def get_duke_liver_dataset( 145 path: Union[os.PathLike, str], 146 patch_shape: Tuple[int, ...], 147 split: Literal['train', 'val', 'test'], 148 resize_inputs: bool = False, 149 download: bool = False, 150 **kwargs 151) -> Dataset: 152 """Get the Duke Liver dataset for segmentation of liver in MRI. 153 154 Args: 155 path: Filepath to a folder where the data needs to be downloaded for further processing. 156 patch_shape: The patch shape to use for training. 157 split: The choice of data split. 158 resize_inputs: Whether to resize the inputs to the patch shape. 159 download: Whether to download the data if it is not present. 160 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 161 162 Returns: 163 The segmentation dataset. 164 """ 165 image_paths, gt_paths = get_duke_liver_paths(path, split, download) 166 167 if resize_inputs: 168 resize_kwargs = {"patch_shape": patch_shape, "is_rgb": False} 169 kwargs, patch_shape = util.update_kwargs_for_resize_trafo( 170 kwargs=kwargs, patch_shape=patch_shape, resize_inputs=resize_inputs, resize_kwargs=resize_kwargs 171 ) 172 173 return torch_em.default_segmentation_dataset( 174 raw_paths=image_paths, 175 raw_key="data", 176 label_paths=gt_paths, 177 label_key="data", 178 is_seg_dataset=True, 179 patch_shape=patch_shape, 180 **kwargs 181 )
Get the Duke Liver dataset for segmentation of liver in MRI.
Arguments:
- path: Filepath to a folder where the data needs to be downloaded for further processing.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
184def get_duke_liver_loader( 185 path: Union[os.PathLike, str], 186 batch_size: int, 187 patch_shape: Tuple[int, ...], 188 split: Literal['train', 'val', 'test'], 189 resize_inputs: bool = False, 190 download: bool = False, 191 **kwargs 192) -> DataLoader: 193 """Get the Duke Liver dataloader for segmentation of liver in MRI. 194 195 Args: 196 path: Filepath to a folder where the data needs to be downloaded for further processing. 197 batch_size: The batch size for training. 198 patch_shape: The patch shape to use for training. 199 split: The choice of data split. 200 resize_inputs: Whether to resize the inputs to the patch shape. 201 download: Whether to download the data if it is not present. 202 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 203 204 Returns: 205 The DataLoader. 206 """ 207 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 208 dataset = get_duke_liver_dataset(path, patch_shape, split, resize_inputs, download, **ds_kwargs) 209 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Duke Liver dataloader for segmentation of liver in MRI.
Arguments:
- path: Filepath to a folder where the data needs to be downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- resize_inputs: Whether to resize the inputs to the patch shape.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.