torch_em.data.datasets.electron_microscopy.vnc
The VNC dataset contains segmentation annotations for mitochondria in EM. It contains two volumes from TEM of the drosophila brain.
Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.
1"""The VNC dataset contains segmentation annotations for mitochondria in EM. 2It contains two volumes from TEM of the drosophila brain. 3 4Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication. 5""" 6 7import os 8from glob import glob 9from shutil import rmtree 10from typing import List, Optional, Union, Tuple 11 12import imageio 13import numpy as np 14from bioimage_cpp.segmentation import label 15 16from torch.utils.data import Dataset, DataLoader 17 18import torch_em 19 20from .. import util 21 22 23URL = "https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip" 24CHECKSUM = "f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86" 25 26 27def _create_volume(f, key, pattern, process=None): 28 images = glob(pattern) 29 images.sort() 30 data = np.concatenate([imageio.imread(im)[None] for im in images], axis=0) 31 if process is not None: 32 data = process(data) 33 f.create_dataset(key, data=data, compression="gzip") 34 35 36def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str: 37 """Download the VNC training data. 38 39 Args: 40 path: Filepath to a folder where the downloaded data will be saved. 41 download: Whether to download the data if it is not present. 42 43 Returns: 44 The path to the downloaded data. 45 """ 46 import h5py 47 48 train_path = os.path.join(path, "vnc_train.h5") 49 test_path = os.path.join(path, "vnc_test.h5") 50 if os.path.exists(train_path) and os.path.exists(test_path): 51 return path 52 53 os.makedirs(path, exist_ok=True) 54 zip_path = os.path.join(path, "vnc.zip") 55 util.download_source(zip_path, URL, download, CHECKSUM) 56 util.unzip(zip_path, path, remove=True) 57 58 root = os.path.join(path, "groundtruth-drosophila-vnc-master") 59 assert os.path.exists(root) 60 61 with h5py.File(train_path, "w") as f: 62 _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif")) 63 _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label) 64 _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label) 65 # TODO find the post-processing to go from neuron labels to membrane labels 66 # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png")) 67 68 with h5py.File(test_path, "w") as f: 69 _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif")) 70 71 rmtree(root) 72 return path 73 74 75def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str: 76 """Get path to the VNC data. 77 78 Args: 79 path: Filepath to a folder where the downloaded data is saved. 80 download: Whether to download the data if it is not present. 81 82 Returns: 83 The filepath to the stored data. 84 """ 85 get_vnc_data(path, download) 86 data_path = os.path.join(path, "vnc_train.h5") 87 return data_path 88 89 90def get_vnc_mito_dataset( 91 path: Union[os.PathLike, str], 92 patch_shape: Tuple[int, int, int], 93 offsets: Optional[List[List[int]]] = None, 94 boundaries: bool = False, 95 binary: bool = False, 96 download: bool = False, 97 **kwargs 98) -> Dataset: 99 """Get the VNC dataset for segmentating mitochondria in EM. 100 101 Args: 102 path: Filepath to a folder where the downloaded data will be saved. 103 patch_shape: The patch shape to use for training. 104 offsets: Offset values for affinity computation used as target. 105 boundaries: Whether to compute boundaries as the target. 106 binary: Whether to return a binary segmentation target. 107 download: Whether to download the data if it is not present. 108 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 109 110 Returns: 111 The segmentation dataset. 112 """ 113 data_path = get_vnc_mito_paths(path, download) 114 115 kwargs, _ = util.add_instance_label_transform( 116 kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, 117 ) 118 119 return torch_em.default_segmentation_dataset( 120 raw_paths=data_path, 121 raw_key="raw", 122 label_paths=data_path, 123 label_key="labels/mitochondria", 124 patch_shape=patch_shape, 125 **kwargs 126 ) 127 128 129def get_vnc_mito_loader( 130 path: Union[os.PathLike, str], 131 patch_shape: Tuple[int, int, int], 132 batch_size: int, 133 offsets: Optional[List[List[int]]] = None, 134 boundaries: bool = False, 135 binary: bool = False, 136 download: bool = False, 137 **kwargs 138) -> DataLoader: 139 """Get the VNC dataloader for segmentating mitochondria in EM. 140 141 Args: 142 path: Filepath to a folder where the downloaded data will be saved. 143 patch_shape: The patch shape to use for training. 144 batch_size: The batch size for training. 145 offsets: Offset values for affinity computation used as target. 146 boundaries: Whether to compute boundaries as the target. 147 binary: Whether to return a binary segmentation target. 148 download: Whether to download the data if it is not present. 149 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 150 151 Returns: 152 The DataLoader. 153 """ 154 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 155 ds = get_vnc_mito_dataset( 156 path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 157 ) 158 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) 159 160 161# TODO implement 162def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs): 163 raise NotImplementedError
URL =
'https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip'
CHECKSUM =
'f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86'
def
get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
37def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str: 38 """Download the VNC training data. 39 40 Args: 41 path: Filepath to a folder where the downloaded data will be saved. 42 download: Whether to download the data if it is not present. 43 44 Returns: 45 The path to the downloaded data. 46 """ 47 import h5py 48 49 train_path = os.path.join(path, "vnc_train.h5") 50 test_path = os.path.join(path, "vnc_test.h5") 51 if os.path.exists(train_path) and os.path.exists(test_path): 52 return path 53 54 os.makedirs(path, exist_ok=True) 55 zip_path = os.path.join(path, "vnc.zip") 56 util.download_source(zip_path, URL, download, CHECKSUM) 57 util.unzip(zip_path, path, remove=True) 58 59 root = os.path.join(path, "groundtruth-drosophila-vnc-master") 60 assert os.path.exists(root) 61 62 with h5py.File(train_path, "w") as f: 63 _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif")) 64 _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label) 65 _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label) 66 # TODO find the post-processing to go from neuron labels to membrane labels 67 # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png")) 68 69 with h5py.File(test_path, "w") as f: 70 _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif")) 71 72 rmtree(root) 73 return path
Download the VNC training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data.
def
get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
76def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str: 77 """Get path to the VNC data. 78 79 Args: 80 path: Filepath to a folder where the downloaded data is saved. 81 download: Whether to download the data if it is not present. 82 83 Returns: 84 The filepath to the stored data. 85 """ 86 get_vnc_data(path, download) 87 data_path = os.path.join(path, "vnc_train.h5") 88 return data_path
Get path to the VNC data.
Arguments:
- path: Filepath to a folder where the downloaded data is saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the stored data.
def
get_vnc_mito_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
91def get_vnc_mito_dataset( 92 path: Union[os.PathLike, str], 93 patch_shape: Tuple[int, int, int], 94 offsets: Optional[List[List[int]]] = None, 95 boundaries: bool = False, 96 binary: bool = False, 97 download: bool = False, 98 **kwargs 99) -> Dataset: 100 """Get the VNC dataset for segmentating mitochondria in EM. 101 102 Args: 103 path: Filepath to a folder where the downloaded data will be saved. 104 patch_shape: The patch shape to use for training. 105 offsets: Offset values for affinity computation used as target. 106 boundaries: Whether to compute boundaries as the target. 107 binary: Whether to return a binary segmentation target. 108 download: Whether to download the data if it is not present. 109 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 110 111 Returns: 112 The segmentation dataset. 113 """ 114 data_path = get_vnc_mito_paths(path, download) 115 116 kwargs, _ = util.add_instance_label_transform( 117 kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, 118 ) 119 120 return torch_em.default_segmentation_dataset( 121 raw_paths=data_path, 122 raw_key="raw", 123 label_paths=data_path, 124 label_key="labels/mitochondria", 125 patch_shape=patch_shape, 126 **kwargs 127 )
Get the VNC dataset for segmentating mitochondria in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_vnc_mito_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
130def get_vnc_mito_loader( 131 path: Union[os.PathLike, str], 132 patch_shape: Tuple[int, int, int], 133 batch_size: int, 134 offsets: Optional[List[List[int]]] = None, 135 boundaries: bool = False, 136 binary: bool = False, 137 download: bool = False, 138 **kwargs 139) -> DataLoader: 140 """Get the VNC dataloader for segmentating mitochondria in EM. 141 142 Args: 143 path: Filepath to a folder where the downloaded data will be saved. 144 patch_shape: The patch shape to use for training. 145 batch_size: The batch size for training. 146 offsets: Offset values for affinity computation used as target. 147 boundaries: Whether to compute boundaries as the target. 148 binary: Whether to return a binary segmentation target. 149 download: Whether to download the data if it is not present. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 151 152 Returns: 153 The DataLoader. 154 """ 155 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 156 ds = get_vnc_mito_dataset( 157 path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 158 ) 159 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the VNC dataloader for segmentating mitochondria in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.
def
get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):