torch_em.data.datasets.electron_microscopy.vnc

The VNC dataset contains segmentation annotations for mitochondria in EM.

It contains two volumes from TEM of the drosophila brain. Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.

  1"""The VNC dataset contains segmentation annotations for mitochondria in EM.
  2
  3It contains two volumes from TEM of the drosophila brain.
  4Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.
  5"""
  6
  7import os
  8from glob import glob
  9from shutil import rmtree
 10from typing import List, Optional, Union, Tuple
 11
 12import imageio
 13import h5py
 14import numpy as np
 15import torch_em
 16from skimage.measure import label
 17from torch.utils.data import Dataset, DataLoader
 18
 19from .. import util
 20
 21URL = "https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip"
 22CHECKSUM = "f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86"
 23
 24
 25def _create_volume(f, key, pattern, process=None):
 26    images = glob(pattern)
 27    images.sort()
 28    data = np.concatenate([imageio.imread(im)[None] for im in images], axis=0)
 29    if process is not None:
 30        data = process(data)
 31    f.create_dataset(key, data=data, compression="gzip")
 32
 33
 34def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
 35    """Download the VNC training data.
 36
 37    Args:
 38        path: Filepath to a folder where the downloaded data will be saved.
 39        download: Whether to download the data if it is not present.
 40
 41    Returns:
 42        The path to the downloaded data.
 43    """
 44
 45    train_path = os.path.join(path, "vnc_train.h5")
 46    test_path = os.path.join(path, "vnc_test.h5")
 47    if os.path.exists(train_path) and os.path.exists(test_path):
 48        return path
 49
 50    os.makedirs(path, exist_ok=True)
 51    zip_path = os.path.join(path, "vnc.zip")
 52    util.download_source(zip_path, URL, download, CHECKSUM)
 53    util.unzip(zip_path, path, remove=True)
 54
 55    root = os.path.join(path, "groundtruth-drosophila-vnc-master")
 56    assert os.path.exists(root)
 57
 58    with h5py.File(train_path, "w") as f:
 59        _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif"))
 60        _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label)
 61        _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label)
 62        # TODO find the post-processing to go from neuron labels to membrane labels
 63        # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png"))
 64
 65    with h5py.File(test_path, "w") as f:
 66        _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif"))
 67
 68    rmtree(root)
 69    return path
 70
 71
 72def get_vnc_mito_dataset(
 73    path: Union[os.PathLike, str],
 74    patch_shape: Tuple[int, int, int],
 75    offsets: Optional[List[List[int]]] = None,
 76    boundaries: bool = False,
 77    binary: bool = False,
 78    download: bool = False,
 79    **kwargs
 80) -> Dataset:
 81    """Get the VNC dataset for segmentating mitochondria in EM.
 82
 83    Args:
 84        path: Filepath to a folder where the downloaded data will be saved.
 85        patch_shape: The patch shape to use for training.
 86        offsets: Offset values for affinity computation used as target.
 87        boundaries: Whether to compute boundaries as the target.
 88        binary: Whether to return a binary segmentation target.
 89        download: Whether to download the data if it is not present.
 90        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 91
 92    Returns:
 93       The segmentation dataset.
 94    """
 95    get_vnc_data(path, download)
 96    data_path = os.path.join(path, "vnc_train.h5")
 97
 98    kwargs, _ = util.add_instance_label_transform(
 99        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
100    )
101
102    raw_key = "raw"
103    label_key = "labels/mitochondria"
104    return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs)
105
106
107def get_vnc_mito_loader(
108    path: Union[os.PathLike, str],
109    patch_shape: Tuple[int, int, int],
110    batch_size: int,
111    offsets: Optional[List[List[int]]] = None,
112    boundaries: bool = False,
113    binary: bool = False,
114    download: bool = False,
115    **kwargs
116) -> DataLoader:
117    """Get the VNC dataloader for segmentating mitochondria in EM.
118
119    Args:
120        path: Filepath to a folder where the downloaded data will be saved.
121        patch_shape: The patch shape to use for training.
122        batch_size: The batch size for training.
123        offsets: Offset values for affinity computation used as target.
124        boundaries: Whether to compute boundaries as the target.
125        binary: Whether to return a binary segmentation target.
126        download: Whether to download the data if it is not present.
127        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
128
129    Returns:
130       The DataLoader.
131    """
132    ds_kwargs, loader_kwargs = util.split_kwargs(
133        torch_em.default_segmentation_dataset, **kwargs
134    )
135    ds = get_vnc_mito_dataset(
136        path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **kwargs
137    )
138    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
139
140
141# TODO implement
142def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
143    raise NotImplementedError
URL = 'https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip'
CHECKSUM = 'f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86'
def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
35def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
36    """Download the VNC training data.
37
38    Args:
39        path: Filepath to a folder where the downloaded data will be saved.
40        download: Whether to download the data if it is not present.
41
42    Returns:
43        The path to the downloaded data.
44    """
45
46    train_path = os.path.join(path, "vnc_train.h5")
47    test_path = os.path.join(path, "vnc_test.h5")
48    if os.path.exists(train_path) and os.path.exists(test_path):
49        return path
50
51    os.makedirs(path, exist_ok=True)
52    zip_path = os.path.join(path, "vnc.zip")
53    util.download_source(zip_path, URL, download, CHECKSUM)
54    util.unzip(zip_path, path, remove=True)
55
56    root = os.path.join(path, "groundtruth-drosophila-vnc-master")
57    assert os.path.exists(root)
58
59    with h5py.File(train_path, "w") as f:
60        _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif"))
61        _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label)
62        _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label)
63        # TODO find the post-processing to go from neuron labels to membrane labels
64        # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png"))
65
66    with h5py.File(test_path, "w") as f:
67        _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif"))
68
69    rmtree(root)
70    return path

Download the VNC training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The path to the downloaded data.

def get_vnc_mito_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 73def get_vnc_mito_dataset(
 74    path: Union[os.PathLike, str],
 75    patch_shape: Tuple[int, int, int],
 76    offsets: Optional[List[List[int]]] = None,
 77    boundaries: bool = False,
 78    binary: bool = False,
 79    download: bool = False,
 80    **kwargs
 81) -> Dataset:
 82    """Get the VNC dataset for segmentating mitochondria in EM.
 83
 84    Args:
 85        path: Filepath to a folder where the downloaded data will be saved.
 86        patch_shape: The patch shape to use for training.
 87        offsets: Offset values for affinity computation used as target.
 88        boundaries: Whether to compute boundaries as the target.
 89        binary: Whether to return a binary segmentation target.
 90        download: Whether to download the data if it is not present.
 91        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 92
 93    Returns:
 94       The segmentation dataset.
 95    """
 96    get_vnc_data(path, download)
 97    data_path = os.path.join(path, "vnc_train.h5")
 98
 99    kwargs, _ = util.add_instance_label_transform(
100        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
101    )
102
103    raw_key = "raw"
104    label_key = "labels/mitochondria"
105    return torch_em.default_segmentation_dataset(data_path, raw_key, data_path, label_key, patch_shape, **kwargs)

Get the VNC dataset for segmentating mitochondria in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_vnc_mito_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
108def get_vnc_mito_loader(
109    path: Union[os.PathLike, str],
110    patch_shape: Tuple[int, int, int],
111    batch_size: int,
112    offsets: Optional[List[List[int]]] = None,
113    boundaries: bool = False,
114    binary: bool = False,
115    download: bool = False,
116    **kwargs
117) -> DataLoader:
118    """Get the VNC dataloader for segmentating mitochondria in EM.
119
120    Args:
121        path: Filepath to a folder where the downloaded data will be saved.
122        patch_shape: The patch shape to use for training.
123        batch_size: The batch size for training.
124        offsets: Offset values for affinity computation used as target.
125        boundaries: Whether to compute boundaries as the target.
126        binary: Whether to return a binary segmentation target.
127        download: Whether to download the data if it is not present.
128        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
129
130    Returns:
131       The DataLoader.
132    """
133    ds_kwargs, loader_kwargs = util.split_kwargs(
134        torch_em.default_segmentation_dataset, **kwargs
135    )
136    ds = get_vnc_mito_dataset(
137        path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **kwargs
138    )
139    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the VNC dataloader for segmentating mitochondria in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.

def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
143def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
144    raise NotImplementedError