torch_em.data.datasets.electron_microscopy.vnc
The VNC dataset contains segmentation annotations for mitochondria in EM. It contains two volumes from TEM of the drosophila brain.
Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.
1"""The VNC dataset contains segmentation annotations for mitochondria in EM. 2It contains two volumes from TEM of the drosophila brain. 3 4Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication. 5""" 6 7import os 8from glob import glob 9from shutil import rmtree 10from typing import List, Optional, Union, Tuple 11 12import imageio 13import numpy as np 14from skimage.measure import label 15 16from torch.utils.data import Dataset, DataLoader 17 18import torch_em 19 20from .. import util 21 22 23URL = "https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip" 24CHECKSUM = "f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86" 25 26 27def _create_volume(f, key, pattern, process=None): 28 images = glob(pattern) 29 images.sort() 30 data = np.concatenate([imageio.imread(im)[None] for im in images], axis=0) 31 if process is not None: 32 data = process(data) 33 f.create_dataset(key, data=data, compression="gzip") 34 35 36def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str: 37 """Download the VNC training data. 38 39 Args: 40 path: Filepath to a folder where the downloaded data will be saved. 41 download: Whether to download the data if it is not present. 42 43 Returns: 44 The path to the downloaded data. 45 """ 46 import h5py 47 48 train_path = os.path.join(path, "vnc_train.h5") 49 test_path = os.path.join(path, "vnc_test.h5") 50 if os.path.exists(train_path) and os.path.exists(test_path): 51 return path 52 53 os.makedirs(path, exist_ok=True) 54 zip_path = os.path.join(path, "vnc.zip") 55 util.download_source(zip_path, URL, download, CHECKSUM) 56 util.unzip(zip_path, path, remove=True) 57 58 root = os.path.join(path, "groundtruth-drosophila-vnc-master") 59 assert os.path.exists(root) 60 61 with h5py.File(train_path, "w") as f: 62 _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif")) 63 _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label) 64 _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label) 65 # TODO find the post-processing to go from neuron labels to membrane labels 66 # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png")) 67 68 with h5py.File(test_path, "w") as f: 69 _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif")) 70 71 rmtree(root) 72 return path 73 74 75def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str: 76 """Get path to the VNC data. 77 78 Args: 79 path: Filepath to a folder where the downloaded data is saved. 80 download: Whether to download the data if it is not present. 81 82 Returns: 83 The filepath to the stored data. 84 """ 85 get_vnc_data(path, download) 86 data_path = os.path.join(path, "vnc_train.h5") 87 return data_path 88 89 90def get_vnc_mito_dataset( 91 path: Union[os.PathLike, str], 92 patch_shape: Tuple[int, int, int], 93 offsets: Optional[List[List[int]]] = None, 94 boundaries: bool = False, 95 binary: bool = False, 96 download: bool = False, 97 **kwargs 98) -> Dataset: 99 """Get the VNC dataset for segmentating mitochondria in EM. 100 101 Args: 102 path: Filepath to a folder where the downloaded data will be saved. 103 patch_shape: The patch shape to use for training. 104 offsets: Offset values for affinity computation used as target. 105 boundaries: Whether to compute boundaries as the target. 106 binary: Whether to return a binary segmentation target. 107 download: Whether to download the data if it is not present. 108 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 109 110 Returns: 111 The segmentation dataset. 112 """ 113 data_path = get_vnc_mito_paths(path, download) 114 115 kwargs, _ = util.add_instance_label_transform( 116 kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, 117 ) 118 119 return torch_em.default_segmentation_dataset( 120 raw_paths=data_path, 121 raw_key="raw", 122 label_paths=data_path, 123 label_key="labels/mitochondria", 124 patch_shape=patch_shape, 125 **kwargs 126 ) 127 128 129def get_vnc_mito_loader( 130 path: Union[os.PathLike, str], 131 patch_shape: Tuple[int, int, int], 132 batch_size: int, 133 offsets: Optional[List[List[int]]] = None, 134 boundaries: bool = False, 135 binary: bool = False, 136 download: bool = False, 137 **kwargs 138) -> DataLoader: 139 """Get the VNC dataloader for segmentating mitochondria in EM. 140 141 Args: 142 path: Filepath to a folder where the downloaded data will be saved. 143 patch_shape: The patch shape to use for training. 144 batch_size: The batch size for training. 145 offsets: Offset values for affinity computation used as target. 146 boundaries: Whether to compute boundaries as the target. 147 binary: Whether to return a binary segmentation target. 148 download: Whether to download the data if it is not present. 149 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 150 151 Returns: 152 The DataLoader. 153 """ 154 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 155 ds = get_vnc_mito_dataset( 156 path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 157 ) 158 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) 159 160 161# TODO implement 162def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs): 163 raise NotImplementedError
URL =
'https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip'
CHECKSUM =
'f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86'
def
get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
37def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str: 38 """Download the VNC training data. 39 40 Args: 41 path: Filepath to a folder where the downloaded data will be saved. 42 download: Whether to download the data if it is not present. 43 44 Returns: 45 The path to the downloaded data. 46 """ 47 import h5py 48 49 train_path = os.path.join(path, "vnc_train.h5") 50 test_path = os.path.join(path, "vnc_test.h5") 51 if os.path.exists(train_path) and os.path.exists(test_path): 52 return path 53 54 os.makedirs(path, exist_ok=True) 55 zip_path = os.path.join(path, "vnc.zip") 56 util.download_source(zip_path, URL, download, CHECKSUM) 57 util.unzip(zip_path, path, remove=True) 58 59 root = os.path.join(path, "groundtruth-drosophila-vnc-master") 60 assert os.path.exists(root) 61 62 with h5py.File(train_path, "w") as f: 63 _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif")) 64 _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label) 65 _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label) 66 # TODO find the post-processing to go from neuron labels to membrane labels 67 # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png")) 68 69 with h5py.File(test_path, "w") as f: 70 _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif")) 71 72 rmtree(root) 73 return path
Download the VNC training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The path to the downloaded data.
def
get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
76def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str: 77 """Get path to the VNC data. 78 79 Args: 80 path: Filepath to a folder where the downloaded data is saved. 81 download: Whether to download the data if it is not present. 82 83 Returns: 84 The filepath to the stored data. 85 """ 86 get_vnc_data(path, download) 87 data_path = os.path.join(path, "vnc_train.h5") 88 return data_path
Get path to the VNC data.
Arguments:
- path: Filepath to a folder where the downloaded data is saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the stored data.
def
get_vnc_mito_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
91def get_vnc_mito_dataset( 92 path: Union[os.PathLike, str], 93 patch_shape: Tuple[int, int, int], 94 offsets: Optional[List[List[int]]] = None, 95 boundaries: bool = False, 96 binary: bool = False, 97 download: bool = False, 98 **kwargs 99) -> Dataset: 100 """Get the VNC dataset for segmentating mitochondria in EM. 101 102 Args: 103 path: Filepath to a folder where the downloaded data will be saved. 104 patch_shape: The patch shape to use for training. 105 offsets: Offset values for affinity computation used as target. 106 boundaries: Whether to compute boundaries as the target. 107 binary: Whether to return a binary segmentation target. 108 download: Whether to download the data if it is not present. 109 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 110 111 Returns: 112 The segmentation dataset. 113 """ 114 data_path = get_vnc_mito_paths(path, download) 115 116 kwargs, _ = util.add_instance_label_transform( 117 kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, 118 ) 119 120 return torch_em.default_segmentation_dataset( 121 raw_paths=data_path, 122 raw_key="raw", 123 label_paths=data_path, 124 label_key="labels/mitochondria", 125 patch_shape=patch_shape, 126 **kwargs 127 )
Get the VNC dataset for segmentating mitochondria in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_vnc_mito_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
130def get_vnc_mito_loader( 131 path: Union[os.PathLike, str], 132 patch_shape: Tuple[int, int, int], 133 batch_size: int, 134 offsets: Optional[List[List[int]]] = None, 135 boundaries: bool = False, 136 binary: bool = False, 137 download: bool = False, 138 **kwargs 139) -> DataLoader: 140 """Get the VNC dataloader for segmentating mitochondria in EM. 141 142 Args: 143 path: Filepath to a folder where the downloaded data will be saved. 144 patch_shape: The patch shape to use for training. 145 batch_size: The batch size for training. 146 offsets: Offset values for affinity computation used as target. 147 boundaries: Whether to compute boundaries as the target. 148 binary: Whether to return a binary segmentation target. 149 download: Whether to download the data if it is not present. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 151 152 Returns: 153 The DataLoader. 154 """ 155 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 156 ds = get_vnc_mito_dataset( 157 path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs 158 ) 159 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the VNC dataloader for segmentating mitochondria in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to return a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.
def
get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):