torch_em.data.datasets.electron_microscopy.vnc

The VNC dataset contains segmentation annotations for mitochondria in EM. It contains two volumes from TEM of the drosophila brain.

Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.

  1"""The VNC dataset contains segmentation annotations for mitochondria in EM.
  2It contains two volumes from TEM of the drosophila brain.
  3
  4Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.
  5"""
  6
  7import os
  8from glob import glob
  9from shutil import rmtree
 10from typing import List, Optional, Union, Tuple
 11
 12import imageio
 13import numpy as np
 14from bioimage_cpp.segmentation import label
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18import torch_em
 19
 20from .. import util
 21
 22
 23URL = "https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip"
 24CHECKSUM = "f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86"
 25
 26
 27def _create_volume(f, key, pattern, process=None):
 28    images = glob(pattern)
 29    images.sort()
 30    data = np.concatenate([imageio.imread(im)[None] for im in images], axis=0)
 31    if process is not None:
 32        data = process(data)
 33    f.create_dataset(key, data=data, compression="gzip")
 34
 35
 36def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
 37    """Download the VNC training data.
 38
 39    Args:
 40        path: Filepath to a folder where the downloaded data will be saved.
 41        download: Whether to download the data if it is not present.
 42
 43    Returns:
 44        The path to the downloaded data.
 45    """
 46    import h5py
 47
 48    train_path = os.path.join(path, "vnc_train.h5")
 49    test_path = os.path.join(path, "vnc_test.h5")
 50    if os.path.exists(train_path) and os.path.exists(test_path):
 51        return path
 52
 53    os.makedirs(path, exist_ok=True)
 54    zip_path = os.path.join(path, "vnc.zip")
 55    util.download_source(zip_path, URL, download, CHECKSUM)
 56    util.unzip(zip_path, path, remove=True)
 57
 58    root = os.path.join(path, "groundtruth-drosophila-vnc-master")
 59    assert os.path.exists(root)
 60
 61    with h5py.File(train_path, "w") as f:
 62        _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif"))
 63        _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label)
 64        _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label)
 65        # TODO find the post-processing to go from neuron labels to membrane labels
 66        # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png"))
 67
 68    with h5py.File(test_path, "w") as f:
 69        _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif"))
 70
 71    rmtree(root)
 72    return path
 73
 74
 75def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
 76    """Get path to the VNC data.
 77
 78    Args:
 79        path: Filepath to a folder where the downloaded data is saved.
 80        download: Whether to download the data if it is not present.
 81
 82    Returns:
 83        The filepath to the stored data.
 84    """
 85    get_vnc_data(path, download)
 86    data_path = os.path.join(path, "vnc_train.h5")
 87    return data_path
 88
 89
 90def get_vnc_mito_dataset(
 91    path: Union[os.PathLike, str],
 92    patch_shape: Tuple[int, int, int],
 93    offsets: Optional[List[List[int]]] = None,
 94    boundaries: bool = False,
 95    binary: bool = False,
 96    download: bool = False,
 97    **kwargs
 98) -> Dataset:
 99    """Get the VNC dataset for segmentating mitochondria in EM.
100
101    Args:
102        path: Filepath to a folder where the downloaded data will be saved.
103        patch_shape: The patch shape to use for training.
104        offsets: Offset values for affinity computation used as target.
105        boundaries: Whether to compute boundaries as the target.
106        binary: Whether to return a binary segmentation target.
107        download: Whether to download the data if it is not present.
108        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
109
110    Returns:
111       The segmentation dataset.
112    """
113    data_path = get_vnc_mito_paths(path, download)
114
115    kwargs, _ = util.add_instance_label_transform(
116        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
117    )
118
119    return torch_em.default_segmentation_dataset(
120        raw_paths=data_path,
121        raw_key="raw",
122        label_paths=data_path,
123        label_key="labels/mitochondria",
124        patch_shape=patch_shape,
125        **kwargs
126    )
127
128
129def get_vnc_mito_loader(
130    path: Union[os.PathLike, str],
131    patch_shape: Tuple[int, int, int],
132    batch_size: int,
133    offsets: Optional[List[List[int]]] = None,
134    boundaries: bool = False,
135    binary: bool = False,
136    download: bool = False,
137    **kwargs
138) -> DataLoader:
139    """Get the VNC dataloader for segmentating mitochondria in EM.
140
141    Args:
142        path: Filepath to a folder where the downloaded data will be saved.
143        patch_shape: The patch shape to use for training.
144        batch_size: The batch size for training.
145        offsets: Offset values for affinity computation used as target.
146        boundaries: Whether to compute boundaries as the target.
147        binary: Whether to return a binary segmentation target.
148        download: Whether to download the data if it is not present.
149        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
150
151    Returns:
152       The DataLoader.
153    """
154    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
155    ds = get_vnc_mito_dataset(
156        path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
157    )
158    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
159
160
161# TODO implement
162def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
163    raise NotImplementedError
URL = 'https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip'
CHECKSUM = 'f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86'
def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
37def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
38    """Download the VNC training data.
39
40    Args:
41        path: Filepath to a folder where the downloaded data will be saved.
42        download: Whether to download the data if it is not present.
43
44    Returns:
45        The path to the downloaded data.
46    """
47    import h5py
48
49    train_path = os.path.join(path, "vnc_train.h5")
50    test_path = os.path.join(path, "vnc_test.h5")
51    if os.path.exists(train_path) and os.path.exists(test_path):
52        return path
53
54    os.makedirs(path, exist_ok=True)
55    zip_path = os.path.join(path, "vnc.zip")
56    util.download_source(zip_path, URL, download, CHECKSUM)
57    util.unzip(zip_path, path, remove=True)
58
59    root = os.path.join(path, "groundtruth-drosophila-vnc-master")
60    assert os.path.exists(root)
61
62    with h5py.File(train_path, "w") as f:
63        _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif"))
64        _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label)
65        _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label)
66        # TODO find the post-processing to go from neuron labels to membrane labels
67        # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png"))
68
69    with h5py.File(test_path, "w") as f:
70        _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif"))
71
72    rmtree(root)
73    return path

Download the VNC training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The path to the downloaded data.

def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
76def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
77    """Get path to the VNC data.
78
79    Args:
80        path: Filepath to a folder where the downloaded data is saved.
81        download: Whether to download the data if it is not present.
82
83    Returns:
84        The filepath to the stored data.
85    """
86    get_vnc_data(path, download)
87    data_path = os.path.join(path, "vnc_train.h5")
88    return data_path

Get path to the VNC data.

Arguments:
  • path: Filepath to a folder where the downloaded data is saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the stored data.

def get_vnc_mito_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 91def get_vnc_mito_dataset(
 92    path: Union[os.PathLike, str],
 93    patch_shape: Tuple[int, int, int],
 94    offsets: Optional[List[List[int]]] = None,
 95    boundaries: bool = False,
 96    binary: bool = False,
 97    download: bool = False,
 98    **kwargs
 99) -> Dataset:
100    """Get the VNC dataset for segmentating mitochondria in EM.
101
102    Args:
103        path: Filepath to a folder where the downloaded data will be saved.
104        patch_shape: The patch shape to use for training.
105        offsets: Offset values for affinity computation used as target.
106        boundaries: Whether to compute boundaries as the target.
107        binary: Whether to return a binary segmentation target.
108        download: Whether to download the data if it is not present.
109        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
110
111    Returns:
112       The segmentation dataset.
113    """
114    data_path = get_vnc_mito_paths(path, download)
115
116    kwargs, _ = util.add_instance_label_transform(
117        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
118    )
119
120    return torch_em.default_segmentation_dataset(
121        raw_paths=data_path,
122        raw_key="raw",
123        label_paths=data_path,
124        label_key="labels/mitochondria",
125        patch_shape=patch_shape,
126        **kwargs
127    )

Get the VNC dataset for segmentating mitochondria in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_vnc_mito_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
130def get_vnc_mito_loader(
131    path: Union[os.PathLike, str],
132    patch_shape: Tuple[int, int, int],
133    batch_size: int,
134    offsets: Optional[List[List[int]]] = None,
135    boundaries: bool = False,
136    binary: bool = False,
137    download: bool = False,
138    **kwargs
139) -> DataLoader:
140    """Get the VNC dataloader for segmentating mitochondria in EM.
141
142    Args:
143        path: Filepath to a folder where the downloaded data will be saved.
144        patch_shape: The patch shape to use for training.
145        batch_size: The batch size for training.
146        offsets: Offset values for affinity computation used as target.
147        boundaries: Whether to compute boundaries as the target.
148        binary: Whether to return a binary segmentation target.
149        download: Whether to download the data if it is not present.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
151
152    Returns:
153       The DataLoader.
154    """
155    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
156    ds = get_vnc_mito_dataset(
157        path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
158    )
159    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the VNC dataloader for segmentating mitochondria in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.

def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
163def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
164    raise NotImplementedError