torch_em.data.datasets.electron_microscopy.vnc
The VNC dataset contains segmentation annotations for mitochondria in EM.
It contains two volumes from TEM of the drosophila brain. Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.
1"""The VNC dataset contains segmentation annotations for mitochondria in EM. 2 3It contains two volumes from TEM of the drosophila brain. 4Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication. 5""" 6 7import os 8from glob import glob 9from shutil import rmtree 10from typing import List, Optional, Union, Tuple 11 12import imageio 13import h5py 14import numpy as np 15import torch_em 16from skimage.measure import label 17from torch.utils.data import Dataset, DataLoader 18 19from .. import util 20 21URL = "https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip" 22CHECKSUM = "f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86" 23 24 25def _create_volume(f, key, pattern, process=None): 26 images = glob(pattern) 27 images.sort() 28 data = np.concatenate([imageio.imread(im)[None] for im in images], axis=0) 29 if process is not None: 30 data = process(data) 31 f.create_dataset(key, data=data, compression="gzip") 32 33 34def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str: 35 """Download the VNC training data. 36 37 Args: 38 path: Filepath to a folder where the downloaded data will be saved. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 The path to the downloaded data. 43 """ 44 45 train_path = os.path.join(path, "vnc_train.h5") 46 test_path = os.path.join(path, "vnc_test.h5") 47 if os.path.exists(train_path) and os.path.exists(test_path): 48 return path 49 50 os.makedirs(path, exist_ok=True) 51 zip_path = os.path.join(path, "vnc.zip") 52 util.download_source(zip_path, URL, download, CHECKSUM) 53 util.unzip(zip_path, path, remove=True) 54 55 root = os.path.join(path, "groundtruth-drosophila-vnc-master") 56 assert os.path.exists(root) 57 58 with h5py.File(train_path, "w") as f: 59 _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif")) 60 _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label) 61 _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label) 62 # TODO find the post-processing to go from neuron labels to membrane labels 63 # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png")) 64 65 with h5py.File(test_path, "w") as f: 66 _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif")) 67 68 rmtree(root) 69 return path 70 71 72def get_vnc_mito_dataset( 73 path: Union[os.PathLike, str], 74 patch_shape: Tuple[int, int, int], 75 offsets: Optional[List[List[int]]] = None, 76 boundaries: bool = False, 77 binary: bool = False, 78 download: bool = False, 79 **kwargs 80) -> Dataset: 81 """Get the VNC dataset for segmentating mitochondria in EM. 82 83 Args: 84 path: Filepath to a folder where the downloaded data will be saved. 85 patch_shape: The patch shape to use for training. 86 offsets: Offset values for affinity computation used as target. 87 boundaries: Whether to compute boundaries as the target. 88 binary: Whether to return a binary segmentation target. 89 download: Whether to download the data if it is not present. 90 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 91 92 Returns: 93 The segmentation dataset. 94 """ 95 get_vnc_data(path, download) 96 data_path = os.path.join(path, "vnc_train.h5") 97 98 kwargs, _ = util.add_instance_label_transform( 99 kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, 100 ) 101 102 raw_key = "raw" 103 label_key = "labels/mitochondria" 104 return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs) 105 106 107def get_vnc_mito_loader( 108 path: Union[os.PathLike, str], 109 patch_shape: Tuple[int, int, int], 110 batch_size: int, 111 offsets: Optional[List[List[int]]] = None, 112 boundaries: bool = False, 113 binary: bool = False, 114 download: bool = False, 115 **kwargs 116) -> DataLoader: 117 """Get the VNC dataloader for segmentating mitochondria in EM. 118 119 Args: 120 path: Filepath to a folder where the downloaded data will be saved. 121 patch_shape: The patch shape to use for training. 122 batch_size: The batch size for training. 123 offsets: Offset values for affinity computation used as target. 124 boundaries: Whether to compute boundaries as the target. 125 binary: Whether to return a binary segmentation target. 126 download: Whether to download the data if it is not present. 127 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 128 129 Returns: 130 The DataLoader. 131 """ 132 ds_kwargs, loader_kwargs = util.split_kwargs( 133 torch_em.default_segmentation_dataset, **kwargs 134 ) 135 ds = get_vnc_mito_dataset( 136 path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **kwargs 137 ) 138 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) 139 140 141# TODO implement 142def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs): 143 raise NotImplementedError
URL =
'https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip'
CHECKSUM =
'f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86'
def
get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
35def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str: 36 """Download the VNC training data. 37 38 Args: 39 path: Filepath to a folder where the downloaded data will be saved. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 The path to the downloaded data. 44 """ 45 46 train_path = os.path.join(path, "vnc_train.h5") 47 test_path = os.path.join(path, "vnc_test.h5") 48 if os.path.exists(train_path) and os.path.exists(test_path): 49 return path 50 51 os.makedirs(path, exist_ok=True) 52 zip_path = os.path.join(path, "vnc.zip") 53 util.download_source(zip_path, URL, download, CHECKSUM) 54 util.unzip(zip_path, path, remove=True) 55 56 root = os.path.join(path, "groundtruth-drosophila-vnc-master") 57 assert os.path.exists(root) 58 59 with h5py.File(train_path, "w") as f: 60 _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif")) 61 _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label) 62 _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label) 63 # TODO find the post-processing to go from neuron labels to membrane labels 64 # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png")) 65 66 with h5py.File(test_path, "w") as f: 67 _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif")) 68 69 rmtree(root) 70 return path
Download the VNC training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data.
def
get_vnc_mito_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
73def get_vnc_mito_dataset( 74 path: Union[os.PathLike, str], 75 patch_shape: Tuple[int, int, int], 76 offsets: Optional[List[List[int]]] = None, 77 boundaries: bool = False, 78 binary: bool = False, 79 download: bool = False, 80 **kwargs 81) -> Dataset: 82 """Get the VNC dataset for segmentating mitochondria in EM. 83 84 Args: 85 path: Filepath to a folder where the downloaded data will be saved. 86 patch_shape: The patch shape to use for training. 87 offsets: Offset values for affinity computation used as target. 88 boundaries: Whether to compute boundaries as the target. 89 binary: Whether to return a binary segmentation target. 90 download: Whether to download the data if it is not present. 91 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 92 93 Returns: 94 The segmentation dataset. 95 """ 96 get_vnc_data(path, download) 97 data_path = os.path.join(path, "vnc_train.h5") 98 99 kwargs, _ = util.add_instance_label_transform( 100 kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, 101 ) 102 103 raw_key = "raw" 104 label_key = "labels/mitochondria" 105 return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs)
Get the VNC dataset for segmentating mitochondria in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_vnc_mito_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
108def get_vnc_mito_loader( 109 path: Union[os.PathLike, str], 110 patch_shape: Tuple[int, int, int], 111 batch_size: int, 112 offsets: Optional[List[List[int]]] = None, 113 boundaries: bool = False, 114 binary: bool = False, 115 download: bool = False, 116 **kwargs 117) -> DataLoader: 118 """Get the VNC dataloader for segmentating mitochondria in EM. 119 120 Args: 121 path: Filepath to a folder where the downloaded data will be saved. 122 patch_shape: The patch shape to use for training. 123 batch_size: The batch size for training. 124 offsets: Offset values for affinity computation used as target. 125 boundaries: Whether to compute boundaries as the target. 126 binary: Whether to return a binary segmentation target. 127 download: Whether to download the data if it is not present. 128 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 129 130 Returns: 131 The DataLoader. 132 """ 133 ds_kwargs, loader_kwargs = util.split_kwargs( 134 torch_em.default_segmentation_dataset, **kwargs 135 ) 136 ds = get_vnc_mito_dataset( 137 path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **kwargs 138 ) 139 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the VNC dataloader for segmentating mitochondria in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.
def
get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):