torch_em.data.datasets.electron_microscopy.uro_cell
The UroCell dataset contains segmentation annotations for the following organelles:
- Food Vacuoles
- Golgi Apparatus
- Lysosomes
- Mitochondria
It contains several FIB-SEM volumes with annotations. This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. Please cite it if you use this dataset for a publication.
1"""The UroCell dataset contains segmentation annotations for the following organelles: 2- Food Vacuoles 3- Golgi Apparatus 4- Lysosomes 5- Mitochondria 6 7It contains several FIB-SEM volumes with annotations. 8This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. 9Please cite it if you use this dataset for a publication. 10""" 11 12import os 13import warnings 14from glob import glob 15from shutil import rmtree 16from typing import List, Optional, Union, Tuple 17 18import h5py 19import torch_em 20from torch.utils.data import Dataset, DataLoader 21from .. import util 22 23 24URL = "https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip" 25CHECKSUM = "a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc" 26 27 28def get_urocell_data(path: Union[os.PathLike, str], download: bool) -> str: 29 """Download the UroCell training data. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The path to the downloaded data. 37 """ 38 if os.path.exists(path): 39 return path 40 41 try: 42 import nibabel as nib 43 except ImportError: 44 raise RuntimeError("Please install the nibabel package.") 45 46 # Download and unzip the data. 47 os.makedirs(path) 48 tmp_path = os.path.join(path, "uro_cell.zip") 49 util.download_source(tmp_path, URL, download, checksum=CHECKSUM) 50 util.unzip(tmp_path, path, remove=True) 51 52 root = os.path.join(path, "UroCell-master") 53 54 files = glob(os.path.join(root, "data", "*.nii.gz")) 55 files.sort() 56 for data_path in files: 57 fname = os.path.basename(data_path) 58 data = nib.load(data_path).get_fdata() 59 60 out_path = os.path.join(path, fname.replace("nii.gz", "h5")) 61 with h5py.File(out_path, "w") as f: 62 f.create_dataset("raw", data=data, compression="gzip") 63 64 # Check if we have any of the organelle labels for this volume 65 # and also copy them if yes. 66 fv_path = os.path.join(root, "fv", "instance", fname) 67 if os.path.exists(fv_path): 68 fv = nib.load(fv_path).get_fdata().astype("uint32") 69 assert fv.shape == data.shape 70 f.create_dataset("labels/fv", data=fv, compression="gzip") 71 72 golgi_path = os.path.join(root, "golgi", "precise", fname) 73 if os.path.exists(golgi_path): 74 golgi = nib.load(golgi_path).get_fdata().astype("uint32") 75 assert golgi.shape == data.shape 76 f.create_dataset("labels/golgi", data=golgi, compression="gzip") 77 78 lyso_path = os.path.join(root, "lyso", "instance", fname) 79 if os.path.exists(lyso_path): 80 lyso = nib.load(lyso_path).get_fdata().astype("uint32") 81 assert lyso.shape == data.shape 82 f.create_dataset("labels/lyso", data=lyso, compression="gzip") 83 84 mito_path = os.path.join(root, "mito", "instance", fname) 85 if os.path.exists(mito_path): 86 mito = nib.load(mito_path).get_fdata().astype("uint32") 87 assert mito.shape == data.shape 88 f.create_dataset("labels/mito", data=mito, compression="gzip") 89 90 # Clean Up. 91 rmtree(root) 92 return path 93 94 95def _get_paths(path, target): 96 label_key = f"labels/{target}" 97 all_paths = glob(os.path.join(path, "*.h5")) 98 all_paths.sort() 99 paths = [path for path in all_paths if label_key in h5py.File(path, "r")] 100 return paths, label_key 101 102 103def get_uro_cell_dataset( 104 path: Union[os.PathLike, str], 105 target: str, 106 patch_shape: Tuple[int, int, int], 107 download: bool = False, 108 offsets: Optional[List[List[int]]] = None, 109 boundaries: bool = False, 110 binary: bool = False, 111 **kwargs 112) -> Dataset: 113 """Get the UroCell dataset for organelle segmentation in FIB-SEM. 114 115 Args: 116 path: Filepath to a folder where the downloaded data will be saved. 117 target: The segmentation target, corresponding to the organelle to segment. 118 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 119 patch_shape: The patch shape to use for training. 120 download: Whether to download the data if it is not present. 121 offsets: Offset values for affinity computation used as target. 122 boundaries: Whether to compute boundaries as the target. 123 binary: Whether to return a binary segmentation target. 124 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 125 126 Returns: 127 The segmentation dataset. 128 """ 129 assert target in ("fv", "golgi", "lyso", "mito") 130 get_urocell_data(path, download) 131 paths, label_key = _get_paths(path, target) 132 133 assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}" 134 if offsets is not None: 135 if target in ("lyso", "golgi"): 136 warnings.warn( 137 f"{target} does not have instance labels, affinities will be computed based on binary segmentation." 138 ) 139 # we add a binary target channel for foreground background segmentation 140 label_transform = torch_em.transform.label.AffinityTransform(offsets=offsets, 141 ignore_label=None, 142 add_binary_target=True, 143 add_mask=True) 144 msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden." 145 kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg) 146 elif boundaries: 147 if target in ("lyso", "golgi"): 148 warnings.warn( 149 f"{target} does not have instance labels, boundaries will be computed based on binary segmentation." 150 ) 151 label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True) 152 msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 153 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 154 elif binary: 155 label_transform = torch_em.transform.label.labels_to_binary 156 msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 157 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 158 159 raw_key = "raw" 160 return torch_em.default_segmentation_dataset( 161 paths, raw_key, paths, label_key, patch_shape, is_seg_dataset=True, **kwargs 162 ) 163 164 165def get_uro_cell_loader( 166 path: Union[os.PathLike, str], 167 target: str, 168 patch_shape: Tuple[int, int, int], 169 batch_size: int, 170 download: bool = False, 171 offsets: Optional[List[List[int]]] = None, 172 boundaries: bool = False, 173 binary: bool = False, 174 **kwargs 175) -> DataLoader: 176 """Get the UroCell dataloader for organelle segmentation in FIB-SEM. 177 178 Args: 179 path: Filepath to a folder where the downloaded data will be saved. 180 target: The segmentation target, corresponding to the organelle to segment. 181 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 182 patch_shape: The patch shape to use for training. 183 batch_size: The batch size for training. 184 download: Whether to download the data if it is not present. 185 offsets: Offset values for affinity computation used as target. 186 boundaries: Whether to compute boundaries as the target. 187 binary: Whether to return a binary segmentation target. 188 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 189 190 Returns: 191 The DataLoader. 192 """ 193 ds_kwargs, loader_kwargs = util.split_kwargs( 194 torch_em.default_segmentation_dataset, **kwargs 195 ) 196 ds = get_uro_cell_dataset( 197 path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 198 ) 199 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL =
'https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip'
CHECKSUM =
'a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc'
def
get_urocell_data(path: Union[os.PathLike, str], download: bool) -> str:
29def get_urocell_data(path: Union[os.PathLike, str], download: bool) -> str: 30 """Download the UroCell training data. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The path to the downloaded data. 38 """ 39 if os.path.exists(path): 40 return path 41 42 try: 43 import nibabel as nib 44 except ImportError: 45 raise RuntimeError("Please install the nibabel package.") 46 47 # Download and unzip the data. 48 os.makedirs(path) 49 tmp_path = os.path.join(path, "uro_cell.zip") 50 util.download_source(tmp_path, URL, download, checksum=CHECKSUM) 51 util.unzip(tmp_path, path, remove=True) 52 53 root = os.path.join(path, "UroCell-master") 54 55 files = glob(os.path.join(root, "data", "*.nii.gz")) 56 files.sort() 57 for data_path in files: 58 fname = os.path.basename(data_path) 59 data = nib.load(data_path).get_fdata() 60 61 out_path = os.path.join(path, fname.replace("nii.gz", "h5")) 62 with h5py.File(out_path, "w") as f: 63 f.create_dataset("raw", data=data, compression="gzip") 64 65 # Check if we have any of the organelle labels for this volume 66 # and also copy them if yes. 67 fv_path = os.path.join(root, "fv", "instance", fname) 68 if os.path.exists(fv_path): 69 fv = nib.load(fv_path).get_fdata().astype("uint32") 70 assert fv.shape == data.shape 71 f.create_dataset("labels/fv", data=fv, compression="gzip") 72 73 golgi_path = os.path.join(root, "golgi", "precise", fname) 74 if os.path.exists(golgi_path): 75 golgi = nib.load(golgi_path).get_fdata().astype("uint32") 76 assert golgi.shape == data.shape 77 f.create_dataset("labels/golgi", data=golgi, compression="gzip") 78 79 lyso_path = os.path.join(root, "lyso", "instance", fname) 80 if os.path.exists(lyso_path): 81 lyso = nib.load(lyso_path).get_fdata().astype("uint32") 82 assert lyso.shape == data.shape 83 f.create_dataset("labels/lyso", data=lyso, compression="gzip") 84 85 mito_path = os.path.join(root, "mito", "instance", fname) 86 if os.path.exists(mito_path): 87 mito = nib.load(mito_path).get_fdata().astype("uint32") 88 assert mito.shape == data.shape 89 f.create_dataset("labels/mito", data=mito, compression="gzip") 90 91 # Clean Up. 92 rmtree(root) 93 return path
Download the UroCell training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data.
def
get_uro_cell_dataset( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
104def get_uro_cell_dataset( 105 path: Union[os.PathLike, str], 106 target: str, 107 patch_shape: Tuple[int, int, int], 108 download: bool = False, 109 offsets: Optional[List[List[int]]] = None, 110 boundaries: bool = False, 111 binary: bool = False, 112 **kwargs 113) -> Dataset: 114 """Get the UroCell dataset for organelle segmentation in FIB-SEM. 115 116 Args: 117 path: Filepath to a folder where the downloaded data will be saved. 118 target: The segmentation target, corresponding to the organelle to segment. 119 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 120 patch_shape: The patch shape to use for training. 121 download: Whether to download the data if it is not present. 122 offsets: Offset values for affinity computation used as target. 123 boundaries: Whether to compute boundaries as the target. 124 binary: Whether to return a binary segmentation target. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 126 127 Returns: 128 The segmentation dataset. 129 """ 130 assert target in ("fv", "golgi", "lyso", "mito") 131 get_urocell_data(path, download) 132 paths, label_key = _get_paths(path, target) 133 134 assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}" 135 if offsets is not None: 136 if target in ("lyso", "golgi"): 137 warnings.warn( 138 f"{target} does not have instance labels, affinities will be computed based on binary segmentation." 139 ) 140 # we add a binary target channel for foreground background segmentation 141 label_transform = torch_em.transform.label.AffinityTransform(offsets=offsets, 142 ignore_label=None, 143 add_binary_target=True, 144 add_mask=True) 145 msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden." 146 kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg) 147 elif boundaries: 148 if target in ("lyso", "golgi"): 149 warnings.warn( 150 f"{target} does not have instance labels, boundaries will be computed based on binary segmentation." 151 ) 152 label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True) 153 msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 154 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 155 elif binary: 156 label_transform = torch_em.transform.label.labels_to_binary 157 msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden." 158 kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg) 159 160 raw_key = "raw" 161 return torch_em.default_segmentation_dataset( 162 paths, raw_key, paths, label_key, patch_shape, is_seg_dataset=True, **kwargs 163 )
Get the UroCell dataset for organelle segmentation in FIB-SEM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_uro_cell_loader( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
166def get_uro_cell_loader( 167 path: Union[os.PathLike, str], 168 target: str, 169 patch_shape: Tuple[int, int, int], 170 batch_size: int, 171 download: bool = False, 172 offsets: Optional[List[List[int]]] = None, 173 boundaries: bool = False, 174 binary: bool = False, 175 **kwargs 176) -> DataLoader: 177 """Get the UroCell dataloader for organelle segmentation in FIB-SEM. 178 179 Args: 180 path: Filepath to a folder where the downloaded data will be saved. 181 target: The segmentation target, corresponding to the organelle to segment. 182 Available organelles are 'fv', 'golgi', 'lyso' and 'mito'. 183 patch_shape: The patch shape to use for training. 184 batch_size: The batch size for training. 185 download: Whether to download the data if it is not present. 186 offsets: Offset values for affinity computation used as target. 187 boundaries: Whether to compute boundaries as the target. 188 binary: Whether to return a binary segmentation target. 189 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 190 191 Returns: 192 The DataLoader. 193 """ 194 ds_kwargs, loader_kwargs = util.split_kwargs( 195 torch_em.default_segmentation_dataset, **kwargs 196 ) 197 ds = get_uro_cell_dataset( 198 path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 199 ) 200 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the UroCell dataloader for organelle segmentation in FIB-SEM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.