torch_em.data.datasets.electron_microscopy.vnc

The VNC dataset contains segmentation annotations for mitochondria in EM. It contains two volumes from TEM of the drosophila brain.

Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.

  1"""The VNC dataset contains segmentation annotations for mitochondria in EM.
  2It contains two volumes from TEM of the drosophila brain.
  3
  4Please cite https://doi.org/10.6084/m9.figshare.856713.v1 if you use this dataset in your publication.
  5"""
  6
  7import os
  8from glob import glob
  9from shutil import rmtree
 10from typing import List, Optional, Union, Tuple
 11
 12import imageio
 13import numpy as np
 14from skimage.measure import label
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18import torch_em
 19
 20from .. import util
 21
 22
 23URL = "https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip"
 24CHECKSUM = "f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86"
 25
 26
 27def _create_volume(f, key, pattern, process=None):
 28    images = glob(pattern)
 29    images.sort()
 30    data = np.concatenate([imageio.imread(im)[None] for im in images], axis=0)
 31    if process is not None:
 32        data = process(data)
 33    f.create_dataset(key, data=data, compression="gzip")
 34
 35
 36def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
 37    """Download the VNC training data.
 38
 39    Args:
 40        path: Filepath to a folder where the downloaded data will be saved.
 41        download: Whether to download the data if it is not present.
 42
 43    Returns:
 44        The path to the downloaded data.
 45    """
 46    import h5py
 47
 48    train_path = os.path.join(path, "vnc_train.h5")
 49    test_path = os.path.join(path, "vnc_test.h5")
 50    if os.path.exists(train_path) and os.path.exists(test_path):
 51        return path
 52
 53    os.makedirs(path, exist_ok=True)
 54    zip_path = os.path.join(path, "vnc.zip")
 55    util.download_source(zip_path, URL, download, CHECKSUM)
 56    util.unzip(zip_path, path, remove=True)
 57
 58    root = os.path.join(path, "groundtruth-drosophila-vnc-master")
 59    assert os.path.exists(root)
 60
 61    with h5py.File(train_path, "w") as f:
 62        _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif"))
 63        _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label)
 64        _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label)
 65        # TODO find the post-processing to go from neuron labels to membrane labels
 66        # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png"))
 67
 68    with h5py.File(test_path, "w") as f:
 69        _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif"))
 70
 71    rmtree(root)
 72    return path
 73
 74
 75def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
 76    """Get path to the VNC data.
 77
 78    Args:
 79        path: Filepath to a folder where the downloaded data is saved.
 80        download: Whether to download the data if it is not present.
 81
 82    Returns:
 83        The filepath to the stored data.
 84    """
 85    get_vnc_data(path, download)
 86    data_path = os.path.join(path, "vnc_train.h5")
 87    return data_path
 88
 89
 90def get_vnc_mito_dataset(
 91    path: Union[os.PathLike, str],
 92    patch_shape: Tuple[int, int, int],
 93    offsets: Optional[List[List[int]]] = None,
 94    boundaries: bool = False,
 95    binary: bool = False,
 96    download: bool = False,
 97    **kwargs
 98) -> Dataset:
 99    """Get the VNC dataset for segmentating mitochondria in EM.
100
101    Args:
102        path: Filepath to a folder where the downloaded data will be saved.
103        patch_shape: The patch shape to use for training.
104        offsets: Offset values for affinity computation used as target.
105        boundaries: Whether to compute boundaries as the target.
106        binary: Whether to return a binary segmentation target.
107        download: Whether to download the data if it is not present.
108        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
109
110    Returns:
111       The segmentation dataset.
112    """
113    data_path = get_vnc_mito_paths(path, download)
114
115    kwargs, _ = util.add_instance_label_transform(
116        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
117    )
118
119    return torch_em.default_segmentation_dataset(
120        raw_paths=data_path,
121        raw_key="raw",
122        label_paths=data_path,
123        label_key="labels/mitochondria",
124        patch_shape=patch_shape,
125        **kwargs
126    )
127
128
129def get_vnc_mito_loader(
130    path: Union[os.PathLike, str],
131    patch_shape: Tuple[int, int, int],
132    batch_size: int,
133    offsets: Optional[List[List[int]]] = None,
134    boundaries: bool = False,
135    binary: bool = False,
136    download: bool = False,
137    **kwargs
138) -> DataLoader:
139    """Get the VNC dataloader for segmentating mitochondria in EM.
140
141    Args:
142        path: Filepath to a folder where the downloaded data will be saved.
143        patch_shape: The patch shape to use for training.
144        batch_size: The batch size for training.
145        offsets: Offset values for affinity computation used as target.
146        boundaries: Whether to compute boundaries as the target.
147        binary: Whether to return a binary segmentation target.
148        download: Whether to download the data if it is not present.
149        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
150
151    Returns:
152       The DataLoader.
153    """
154    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
155    ds = get_vnc_mito_dataset(
156        path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
157    )
158    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
159
160
161# TODO implement
162def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
163    raise NotImplementedError
URL = 'https://github.com/unidesigner/groundtruth-drosophila-vnc/archive/refs/heads/master.zip'
CHECKSUM = 'f7bd0db03c86b64440a16b60360ad60c0a4411f89e2c021c7ee2c8d6af3d7e86'
def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
37def get_vnc_data(path: Union[os.PathLike, str], download: bool) -> str:
38    """Download the VNC training data.
39
40    Args:
41        path: Filepath to a folder where the downloaded data will be saved.
42        download: Whether to download the data if it is not present.
43
44    Returns:
45        The path to the downloaded data.
46    """
47    import h5py
48
49    train_path = os.path.join(path, "vnc_train.h5")
50    test_path = os.path.join(path, "vnc_test.h5")
51    if os.path.exists(train_path) and os.path.exists(test_path):
52        return path
53
54    os.makedirs(path, exist_ok=True)
55    zip_path = os.path.join(path, "vnc.zip")
56    util.download_source(zip_path, URL, download, CHECKSUM)
57    util.unzip(zip_path, path, remove=True)
58
59    root = os.path.join(path, "groundtruth-drosophila-vnc-master")
60    assert os.path.exists(root)
61
62    with h5py.File(train_path, "w") as f:
63        _create_volume(f, "raw", os.path.join(root, "stack1", "raw", "*.tif"))
64        _create_volume(f, "labels/mitochondria", os.path.join(root, "stack1", "mitochondria", "*.png"), process=label)
65        _create_volume(f, "labels/synapses", os.path.join(root, "stack1", "synapses", "*.png"), process=label)
66        # TODO find the post-processing to go from neuron labels to membrane labels
67        # _create_volume(f, "labels/neurons", os.path.join(root, "stack1", "membranes", "*.png"))
68
69    with h5py.File(test_path, "w") as f:
70        _create_volume(f, "raw", os.path.join(root, "stack2", "raw", "*.tif"))
71
72    rmtree(root)
73    return path

Download the VNC training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The path to the downloaded data.

def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
76def get_vnc_mito_paths(path: Union[os.PathLike, str], download: bool = False) -> str:
77    """Get path to the VNC data.
78
79    Args:
80        path: Filepath to a folder where the downloaded data is saved.
81        download: Whether to download the data if it is not present.
82
83    Returns:
84        The filepath to the stored data.
85    """
86    get_vnc_data(path, download)
87    data_path = os.path.join(path, "vnc_train.h5")
88    return data_path

Get path to the VNC data.

Arguments:
  • path: Filepath to a folder where the downloaded data is saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the stored data.

def get_vnc_mito_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 91def get_vnc_mito_dataset(
 92    path: Union[os.PathLike, str],
 93    patch_shape: Tuple[int, int, int],
 94    offsets: Optional[List[List[int]]] = None,
 95    boundaries: bool = False,
 96    binary: bool = False,
 97    download: bool = False,
 98    **kwargs
 99) -> Dataset:
100    """Get the VNC dataset for segmentating mitochondria in EM.
101
102    Args:
103        path: Filepath to a folder where the downloaded data will be saved.
104        patch_shape: The patch shape to use for training.
105        offsets: Offset values for affinity computation used as target.
106        boundaries: Whether to compute boundaries as the target.
107        binary: Whether to return a binary segmentation target.
108        download: Whether to download the data if it is not present.
109        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
110
111    Returns:
112       The segmentation dataset.
113    """
114    data_path = get_vnc_mito_paths(path, download)
115
116    kwargs, _ = util.add_instance_label_transform(
117        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
118    )
119
120    return torch_em.default_segmentation_dataset(
121        raw_paths=data_path,
122        raw_key="raw",
123        label_paths=data_path,
124        label_key="labels/mitochondria",
125        patch_shape=patch_shape,
126        **kwargs
127    )

Get the VNC dataset for segmentating mitochondria in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_vnc_mito_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
130def get_vnc_mito_loader(
131    path: Union[os.PathLike, str],
132    patch_shape: Tuple[int, int, int],
133    batch_size: int,
134    offsets: Optional[List[List[int]]] = None,
135    boundaries: bool = False,
136    binary: bool = False,
137    download: bool = False,
138    **kwargs
139) -> DataLoader:
140    """Get the VNC dataloader for segmentating mitochondria in EM.
141
142    Args:
143        path: Filepath to a folder where the downloaded data will be saved.
144        patch_shape: The patch shape to use for training.
145        batch_size: The batch size for training.
146        offsets: Offset values for affinity computation used as target.
147        boundaries: Whether to compute boundaries as the target.
148        binary: Whether to return a binary segmentation target.
149        download: Whether to download the data if it is not present.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
151
152    Returns:
153       The DataLoader.
154    """
155    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
156    ds = get_vnc_mito_dataset(
157        path, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
158    )
159    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the VNC dataloader for segmentating mitochondria in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.

def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
163def get_vnc_neuron_loader(path, patch_shape, download=False, **kwargs):
164    raise NotImplementedError