torch_em.data.datasets.electron_microscopy.uro_cell
The UroCell dataset contains segmentation annotations for the following organelles:
- Food Vacuoles
- Golgi Apparatus
- Lysosomes
- Mitochondria It contains several FIB-SEM volumes with annotations.
This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. Please cite it if you use this dataset for a publication.
1"""The UroCell dataset contains segmentation annotations for the following organelles: 2- Food Vacuoles 3- Golgi Apparatus 4- Lysosomes 5- Mitochondria 6It contains several FIB-SEM volumes with annotations. 7 8This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. 9Please cite it if you use this dataset for a publication. 10""" 11 12import os 13import warnings 14from glob import glob 15from shutil import rmtree 16from typing import List, Optional, Union, Tuple 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25URL = "https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip" 26CHECKSUM = "a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc" 27 28 29def get_uro_cell_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the UroCell training data. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The path to the downloaded data. 38 """ 39 import h5py 40 41 if os.path.exists(path): 42 return path 43 44 try: 45 import nibabel as nib 46 except ImportError: 47 raise RuntimeError("Please install the nibabel package.") 48 49 # Download and unzip the data. 50 os.makedirs(path) 51 tmp_path = os.path.join(path, "uro_cell.zip") 52 util.download_source(tmp_path, URL, download, checksum=CHECKSUM) 53 util.unzip(tmp_path, path, remove=True) 54 55 root = os.path.join(path, "UroCell-master") 56 57 files = glob(os.path.join(root, "data", "*.nii.gz")) 58 files.sort() 59 for data_path in files: 60 fname = os.path.basename(data_path) 61 data = nib.load(data_path).get_fdata() 62 63 out_path = os.path.join(path, fname.replace("nii.gz", "h5")) 64 with h5py.File(out_path, "w") as f: 65 f.create_dataset("raw", data=data, compression="gzip") 66 67 # Check if we have any of the organelle labels for this volume 68 # and also copy them if yes. 69 fv_path = os.path.join(root, "fv", "instance", fname) 70 if os.path.exists(fv_path): 71 fv = nib.load(fv_path).get_fdata().astype("uint32") 72 assert fv.shape == data.shape 73 f.create_dataset("labels/fv", data=fv, compression="gzip") 74 75 golgi_path = os.path.join(root, "golgi", "precise", fname) 76 if os.path.exists(golgi_path): 77 golgi = nib.load(golgi_path).get_fdata().astype("uint32") 78 assert golgi.shape == data.shape 79 f.create_dataset("labels/golgi", data=golgi, compression="gzip") 80 81 lyso_path = os.path.join(root, "lyso", "instance", fname) 82 if os.path.exists(lyso_path): 83 lyso = nib.load(lyso_path).get_fdata().astype("uint32") 84 assert lyso.shape == data.shape 85 f.create_dataset("labels/lyso", data=lyso, compression="gzip") 86 87 mito_path = os.path.join(root, "mito", "instance", fname) 88 if os.path.exists(mito_path): 89 mito = nib.load(mito_path).get_fdata().astype("uint32") 90 assert mito.shape == data.shape 91 f.create_dataset("labels/mito", data=mito, compression="gzip") 92 93 # Clean Up. 94 rmtree(root) 95 return path 96 97 98def get_uro_cell_paths( 99 path: Union[os.PathLike], target: str, download: bool = False, return_label_key: bool = False, 100) -> List[str]: 101 """Get paths to the UroCell data. 102 103 Args: 104 path: Filepath to a folder where the downloaded data will be saved. 105 target: The segmentation target, corresponding to the organelle to segment. 106 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 107 download: Whether to download the data if it is not present. 108 return_label_key: Whether to return the label key. 109 110 Returns: 111 List of filepaths to the stored data. 112 """ 113 import h5py 114 115 get_uro_cell_data(path, download) 116 117 label_key = f"labels/{target}" 118 all_paths = glob(os.path.join(path, "*.h5")) 119 all_paths.sort() 120 paths = [path for path in all_paths if label_key in h5py.File(path, "r")] 121 122 if return_label_key: 123 return paths, label_key 124 else: 125 return paths 126 127 128def get_uro_cell_dataset( 129 path: Union[os.PathLike, str], 130 target: str, 131 patch_shape: Tuple[int, int, int], 132 download: bool = False, 133 offsets: Optional[List[List[int]]] = None, 134 boundaries: bool = False, 135 binary: bool = False, 136 **kwargs 137) -> Dataset: 138 """Get the UroCell dataset for organelle segmentation in FIB-SEM. 139 140 Args: 141 path: Filepath to a folder where the downloaded data will be saved. 142 target: The segmentation target, corresponding to the organelle to segment. 143 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 144 patch_shape: The patch shape to use for training. 145 download: Whether to download the data if it is not present. 146 offsets: Offset values for affinity computation used as target. 147 boundaries: Whether to compute boundaries as the target. 148 binary: Whether to return a binary segmentation target. 149 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 150 151 Returns: 152 The segmentation dataset. 153 """ 154 assert target in ("fv", "golgi", "lyso", "mito") 155 156 paths, label_key = get_uro_cell_paths(path, target, download, return_label_key=True) 157 158 assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}" 159 if offsets is not None: 160 if target in ("lyso", "golgi"): 161 warnings.warn( 162 f"{target} does not have instance labels, affinities will be computed based on binary segmentation." 163 ) 164 # we add a binary target channel for foreground background segmentation 165 label_transform = torch_em.transform.label.AffinityTransform( 166 offsets=offsets, ignore_label=None, add_binary_target=True, add_mask=True 167 ) 168 msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden." 169 kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg) 170 elif boundaries: 171 if target in ("lyso", "golgi"): 172 warnings.warn( 173 f"{target} does not have instance labels, boundaries will be computed based on binary segmentation." 174 ) 175 label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True) 176 msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 177 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 178 elif binary: 179 label_transform = torch_em.transform.label.labels_to_binary 180 msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 181 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 182 183 return torch_em.default_segmentation_dataset( 184 raw_paths=paths, 185 raw_key="raw", 186 label_paths=paths, 187 label_key=label_key, 188 patch_shape=patch_shape, 189 is_seg_dataset=True, 190 **kwargs 191 ) 192 193 194def get_uro_cell_loader( 195 path: Union[os.PathLike, str], 196 target: str, 197 patch_shape: Tuple[int, int, int], 198 batch_size: int, 199 download: bool = False, 200 offsets: Optional[List[List[int]]] = None, 201 boundaries: bool = False, 202 binary: bool = False, 203 **kwargs 204) -> DataLoader: 205 """Get the UroCell dataloader for organelle segmentation in FIB-SEM. 206 207 Args: 208 path: Filepath to a folder where the downloaded data will be saved. 209 target: The segmentation target, corresponding to the organelle to segment. 210 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 211 patch_shape: The patch shape to use for training. 212 batch_size: The batch size for training. 213 download: Whether to download the data if it is not present. 214 offsets: Offset values for affinity computation used as target. 215 boundaries: Whether to compute boundaries as the target. 216 binary: Whether to return a binary segmentation target. 217 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 218 219 Returns: 220 The DataLoader. 221 """ 222 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 223 ds = get_uro_cell_dataset( 224 path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 225 ) 226 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL =
'https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip'
CHECKSUM =
'a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc'
def
get_uro_cell_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30def get_uro_cell_data(path: Union[os.PathLike, str], download: bool = False) -> str: 31 """Download the UroCell training data. 32 33 Args: 34 path: Filepath to a folder where the downloaded data will be saved. 35 download: Whether to download the data if it is not present. 36 37 Returns: 38 The path to the downloaded data. 39 """ 40 import h5py 41 42 if os.path.exists(path): 43 return path 44 45 try: 46 import nibabel as nib 47 except ImportError: 48 raise RuntimeError("Please install the nibabel package.") 49 50 # Download and unzip the data. 51 os.makedirs(path) 52 tmp_path = os.path.join(path, "uro_cell.zip") 53 util.download_source(tmp_path, URL, download, checksum=CHECKSUM) 54 util.unzip(tmp_path, path, remove=True) 55 56 root = os.path.join(path, "UroCell-master") 57 58 files = glob(os.path.join(root, "data", "*.nii.gz")) 59 files.sort() 60 for data_path in files: 61 fname = os.path.basename(data_path) 62 data = nib.load(data_path).get_fdata() 63 64 out_path = os.path.join(path, fname.replace("nii.gz", "h5")) 65 with h5py.File(out_path, "w") as f: 66 f.create_dataset("raw", data=data, compression="gzip") 67 68 # Check if we have any of the organelle labels for this volume 69 # and also copy them if yes. 70 fv_path = os.path.join(root, "fv", "instance", fname) 71 if os.path.exists(fv_path): 72 fv = nib.load(fv_path).get_fdata().astype("uint32") 73 assert fv.shape == data.shape 74 f.create_dataset("labels/fv", data=fv, compression="gzip") 75 76 golgi_path = os.path.join(root, "golgi", "precise", fname) 77 if os.path.exists(golgi_path): 78 golgi = nib.load(golgi_path).get_fdata().astype("uint32") 79 assert golgi.shape == data.shape 80 f.create_dataset("labels/golgi", data=golgi, compression="gzip") 81 82 lyso_path = os.path.join(root, "lyso", "instance", fname) 83 if os.path.exists(lyso_path): 84 lyso = nib.load(lyso_path).get_fdata().astype("uint32") 85 assert lyso.shape == data.shape 86 f.create_dataset("labels/lyso", data=lyso, compression="gzip") 87 88 mito_path = os.path.join(root, "mito", "instance", fname) 89 if os.path.exists(mito_path): 90 mito = nib.load(mito_path).get_fdata().astype("uint32") 91 assert mito.shape == data.shape 92 f.create_dataset("labels/mito", data=mito, compression="gzip") 93 94 # Clean Up. 95 rmtree(root) 96 return path
Download the UroCell training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data.
def
get_uro_cell_paths( path: os.PathLike, target: str, download: bool = False, return_label_key: bool = False) -> List[str]:
99def get_uro_cell_paths( 100 path: Union[os.PathLike], target: str, download: bool = False, return_label_key: bool = False, 101) -> List[str]: 102 """Get paths to the UroCell data. 103 104 Args: 105 path: Filepath to a folder where the downloaded data will be saved. 106 target: The segmentation target, corresponding to the organelle to segment. 107 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 108 download: Whether to download the data if it is not present. 109 return_label_key: Whether to return the label key. 110 111 Returns: 112 List of filepaths to the stored data. 113 """ 114 import h5py 115 116 get_uro_cell_data(path, download) 117 118 label_key = f"labels/{target}" 119 all_paths = glob(os.path.join(path, "*.h5")) 120 all_paths.sort() 121 paths = [path for path in all_paths if label_key in h5py.File(path, "r")] 122 123 if return_label_key: 124 return paths, label_key 125 else: 126 return paths
Get paths to the UroCell data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
- download: Whether to download the data if it is not present.
- return_label_key: Whether to return the label key.
Returns:
List of filepaths to the stored data.
def
get_uro_cell_dataset( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
129def get_uro_cell_dataset( 130 path: Union[os.PathLike, str], 131 target: str, 132 patch_shape: Tuple[int, int, int], 133 download: bool = False, 134 offsets: Optional[List[List[int]]] = None, 135 boundaries: bool = False, 136 binary: bool = False, 137 **kwargs 138) -> Dataset: 139 """Get the UroCell dataset for organelle segmentation in FIB-SEM. 140 141 Args: 142 path: Filepath to a folder where the downloaded data will be saved. 143 target: The segmentation target, corresponding to the organelle to segment. 144 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 145 patch_shape: The patch shape to use for training. 146 download: Whether to download the data if it is not present. 147 offsets: Offset values for affinity computation used as target. 148 boundaries: Whether to compute boundaries as the target. 149 binary: Whether to return a binary segmentation target. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 151 152 Returns: 153 The segmentation dataset. 154 """ 155 assert target in ("fv", "golgi", "lyso", "mito") 156 157 paths, label_key = get_uro_cell_paths(path, target, download, return_label_key=True) 158 159 assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}" 160 if offsets is not None: 161 if target in ("lyso", "golgi"): 162 warnings.warn( 163 f"{target} does not have instance labels, affinities will be computed based on binary segmentation." 164 ) 165 # we add a binary target channel for foreground background segmentation 166 label_transform = torch_em.transform.label.AffinityTransform( 167 offsets=offsets, ignore_label=None, add_binary_target=True, add_mask=True 168 ) 169 msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden." 170 kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg) 171 elif boundaries: 172 if target in ("lyso", "golgi"): 173 warnings.warn( 174 f"{target} does not have instance labels, boundaries will be computed based on binary segmentation." 175 ) 176 label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True) 177 msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 178 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 179 elif binary: 180 label_transform = torch_em.transform.label.labels_to_binary 181 msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 182 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 183 184 return torch_em.default_segmentation_dataset( 185 raw_paths=paths, 186 raw_key="raw", 187 label_paths=paths, 188 label_key=label_key, 189 patch_shape=patch_shape, 190 is_seg_dataset=True, 191 **kwargs 192 )
Get the UroCell dataset for organelle segmentation in FIB-SEM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_uro_cell_loader( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
195def get_uro_cell_loader( 196 path: Union[os.PathLike, str], 197 target: str, 198 patch_shape: Tuple[int, int, int], 199 batch_size: int, 200 download: bool = False, 201 offsets: Optional[List[List[int]]] = None, 202 boundaries: bool = False, 203 binary: bool = False, 204 **kwargs 205) -> DataLoader: 206 """Get the UroCell dataloader for organelle segmentation in FIB-SEM. 207 208 Args: 209 path: Filepath to a folder where the downloaded data will be saved. 210 target: The segmentation target, corresponding to the organelle to segment. 211 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 212 patch_shape: The patch shape to use for training. 213 batch_size: The batch size for training. 214 download: Whether to download the data if it is not present. 215 offsets: Offset values for affinity computation used as target. 216 boundaries: Whether to compute boundaries as the target. 217 binary: Whether to return a binary segmentation target. 218 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 219 220 Returns: 221 The DataLoader. 222 """ 223 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 224 ds = get_uro_cell_dataset( 225 path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 226 ) 227 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the UroCell dataloader for organelle segmentation in FIB-SEM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.