torch_em.data.datasets.electron_microscopy.synapseweb_hippocampus

The SynapseWeb hippocampus dataset contains three volumes of hippocampal CA1 neuropil from adult rat, imaged with serial section TEM at ~2x2x50 nm resolution. All axons, dendrites, glia, and synapses are reconstructed as instance segmentations.

The dataset is described in Harris et al. (2015): "A resource from 3D electron microscopy of hippocampal neuropil for user training and tool development" https://doi.org/10.1038/sdata.2015.46

Please cite this publication if you use this dataset in your research.

  1"""The SynapseWeb hippocampus dataset contains three volumes of hippocampal CA1 neuropil
  2from adult rat, imaged with serial section TEM at ~2x2x50 nm resolution. All axons,
  3dendrites, glia, and synapses are reconstructed as instance segmentations.
  4
  5The dataset is described in Harris et al. (2015):
  6"A resource from 3D electron microscopy of hippocampal neuropil for user training and tool development"
  7https://doi.org/10.1038/sdata.2015.46
  8
  9Please cite this publication if you use this dataset in your research.
 10"""
 11
 12import os
 13from typing import Any, Dict, List, Optional, Tuple, Union
 14
 15import h5py
 16import numpy as np
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25REGIONS = ("spine", "oblique", "apical")
 26
 27# Bounding boxes (x0, x1, y0, y1, z0, z1) of the annotated sub-regions within each CloudVolume.
 28ANNO_BBOXES = {
 29    "spine": (3072, 6144, 1536, 3840, 30, 80),
 30    "oblique": (512, 4608, 768, 4608, 6, 91),
 31    "apical": (2048, 6144, 2048, 6400, 55, 167),
 32}
 33
 34# ROIs covering only the densely annotated cube within each downloaded volume,
 35# determined by finding contiguous slices with >5% label coverage per axis.
 36DENSE_ROIS = {
 37    "spine": np.s_[0:42, 784:1665, 1007:1944],
 38    "oblique": np.s_[5:75, 1243:3505, 1385:3215],
 39    "apical": np.s_[5:106, 217:3681, 477:3936],
 40}
 41
 42
 43def _download_volume(region, out_path):
 44    try:
 45        from cloudvolume import CloudVolume
 46    except ImportError:
 47        raise ImportError(
 48            "cloudvolume is required to download this data. Install it with: pip install cloud-volume"
 49        )
 50
 51    x0, x1, y0, y1, z0, z1 = ANNO_BBOXES[region]
 52    nx, ny, nz = x1 - x0, y1 - y0, z1 - z0
 53
 54    vol_em = CloudVolume(
 55        f"s3://open-neurodata/kharris15/{region}/em", mip=0, use_https=True, fill_missing=True
 56    )
 57    vol_anno = CloudVolume(
 58        f"s3://open-neurodata/kharris15/{region}/anno", mip=0, use_https=True, fill_missing=True
 59    )
 60
 61    # Download in z-slabs and write incrementally to avoid loading the full volume into memory.
 62    z_slab = 16
 63    with h5py.File(out_path, "w") as f:
 64        ds_raw = f.create_dataset("raw", shape=(nz, ny, nx), dtype="uint8", compression="gzip")
 65        ds_labels = f.create_dataset("labels", shape=(nz, ny, nx), dtype="uint64", compression="gzip")
 66        for z in range(z0, z1, z_slab):
 67            ze = min(z + z_slab, z1)
 68            # CloudVolume returns (x, y, z, channel); squeeze and transpose to (z, y, x).
 69            slab_raw = np.array(vol_em[x0:x1, y0:y1, z:ze]).squeeze().transpose(2, 1, 0)
 70            slab_labels = np.array(vol_anno[x0:x1, y0:y1, z:ze]).squeeze().transpose(2, 1, 0)
 71            zi = z - z0
 72            ds_raw[zi:zi + ze - z] = slab_raw
 73            ds_labels[zi:zi + ze - z] = slab_labels
 74
 75
 76def get_synapseweb_hippocampus_data(path: Union[os.PathLike, str], region: str, download: bool):
 77    """Download the SynapseWeb hippocampus data for a given region and store it as an HDF5 file.
 78
 79    Args:
 80        path: Filepath to a folder where the data will be saved.
 81        region: The region to download. One of 'spine', 'oblique', 'apical'.
 82        download: Whether to download the data if it is not present.
 83    """
 84    if region not in REGIONS:
 85        raise ValueError(f"'{region}' is not a valid region. Choose from {REGIONS}.")
 86
 87    os.makedirs(path, exist_ok=True)
 88    out_path = os.path.join(path, f"synapseweb_hippocampus_{region}.h5")
 89    if os.path.exists(out_path):
 90        return
 91
 92    if not download:
 93        raise RuntimeError(
 94            f"SynapseWeb hippocampus data for region '{region}' not found at {out_path}. "
 95            "Pass download=True to download it."
 96        )
 97
 98    _download_volume(region, out_path)
 99
100
101def get_synapseweb_hippocampus_paths(
102    path: Union[os.PathLike, str],
103    regions: Tuple[str, ...] = ("spine", "oblique", "apical"),
104    download: bool = False,
105) -> List[str]:
106    """Get paths to the SynapseWeb hippocampus HDF5 files.
107
108    Args:
109        path: Filepath to a folder where the data will be saved.
110        regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
111        download: Whether to download the data if it is not present.
112
113    Returns:
114        List of filepaths to the HDF5 files.
115    """
116    for region in regions:
117        get_synapseweb_hippocampus_data(path, region, download)
118    return [os.path.join(path, f"synapseweb_hippocampus_{region}.h5") for region in regions]
119
120
121def get_synapseweb_hippocampus_dataset(
122    path: Union[os.PathLike, str],
123    patch_shape: Tuple[int, int, int],
124    regions: Tuple[str, ...] = ("spine", "oblique", "apical"),
125    rois: Dict[str, Any] = {},
126    download: bool = False,
127    offsets: Optional[List[List[int]]] = None,
128    boundaries: bool = False,
129    **kwargs,
130) -> Dataset:
131    """Get the SynapseWeb hippocampus dataset for neuron segmentation in serial section TEM.
132
133    Args:
134        path: Filepath to a folder where the data will be saved.
135        patch_shape: The patch shape to use for training.
136        regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
137        rois: Dict mapping region name to a region of interest slice. Defaults to the
138            densely annotated sub-cube per region.
139        download: Whether to download the data if it is not present.
140        offsets: Offset values for affinity computation used as target.
141        boundaries: Whether to compute boundaries as the target.
142        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
143
144    Returns:
145        The segmentation dataset.
146    """
147    assert len(patch_shape) == 3
148
149    data_paths = get_synapseweb_hippocampus_paths(path, regions, download)
150    data_rois = [rois.get(region, DENSE_ROIS[region]) for region in regions]
151
152    kwargs, _ = util.add_instance_label_transform(
153        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
154    )
155    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
156
157    return torch_em.default_segmentation_dataset(
158        raw_paths=data_paths,
159        raw_key="raw",
160        label_paths=data_paths,
161        label_key="labels",
162        patch_shape=patch_shape,
163        rois=data_rois,
164        **kwargs
165    )
166
167
168def get_synapseweb_hippocampus_loader(
169    path: Union[os.PathLike, str],
170    batch_size: int,
171    patch_shape: Tuple[int, int, int],
172    regions: Tuple[str, ...] = ("spine", "oblique", "apical"),
173    rois: Dict[str, Any] = {},
174    download: bool = False,
175    offsets: Optional[List[List[int]]] = None,
176    boundaries: bool = False,
177    **kwargs,
178) -> DataLoader:
179    """Get the DataLoader for neuron segmentation in the SynapseWeb hippocampus dataset.
180
181    Args:
182        path: Filepath to a folder where the data will be saved.
183        batch_size: The batch size for training.
184        patch_shape: The patch shape to use for training.
185        regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
186        rois: Dict mapping region name to a region of interest slice. Defaults to the
187            densely annotated sub-cube per region.
188        download: Whether to download the data if it is not present.
189        offsets: Offset values for affinity computation used as target.
190        boundaries: Whether to compute boundaries as the target.
191        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
192
193    Returns:
194        The DataLoader.
195    """
196    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
197    dataset = get_synapseweb_hippocampus_dataset(
198        path, patch_shape, regions, rois, download, offsets, boundaries, **ds_kwargs
199    )
200    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
REGIONS = ('spine', 'oblique', 'apical')
ANNO_BBOXES = {'spine': (3072, 6144, 1536, 3840, 30, 80), 'oblique': (512, 4608, 768, 4608, 6, 91), 'apical': (2048, 6144, 2048, 6400, 55, 167)}
DENSE_ROIS = {'spine': (slice(0, 42, None), slice(784, 1665, None), slice(1007, 1944, None)), 'oblique': (slice(5, 75, None), slice(1243, 3505, None), slice(1385, 3215, None)), 'apical': (slice(5, 106, None), slice(217, 3681, None), slice(477, 3936, None))}
def get_synapseweb_hippocampus_data(path: Union[os.PathLike, str], region: str, download: bool):
77def get_synapseweb_hippocampus_data(path: Union[os.PathLike, str], region: str, download: bool):
78    """Download the SynapseWeb hippocampus data for a given region and store it as an HDF5 file.
79
80    Args:
81        path: Filepath to a folder where the data will be saved.
82        region: The region to download. One of 'spine', 'oblique', 'apical'.
83        download: Whether to download the data if it is not present.
84    """
85    if region not in REGIONS:
86        raise ValueError(f"'{region}' is not a valid region. Choose from {REGIONS}.")
87
88    os.makedirs(path, exist_ok=True)
89    out_path = os.path.join(path, f"synapseweb_hippocampus_{region}.h5")
90    if os.path.exists(out_path):
91        return
92
93    if not download:
94        raise RuntimeError(
95            f"SynapseWeb hippocampus data for region '{region}' not found at {out_path}. "
96            "Pass download=True to download it."
97        )
98
99    _download_volume(region, out_path)

Download the SynapseWeb hippocampus data for a given region and store it as an HDF5 file.

Arguments:
  • path: Filepath to a folder where the data will be saved.
  • region: The region to download. One of 'spine', 'oblique', 'apical'.
  • download: Whether to download the data if it is not present.
def get_synapseweb_hippocampus_paths( path: Union[os.PathLike, str], regions: Tuple[str, ...] = ('spine', 'oblique', 'apical'), download: bool = False) -> List[str]:
102def get_synapseweb_hippocampus_paths(
103    path: Union[os.PathLike, str],
104    regions: Tuple[str, ...] = ("spine", "oblique", "apical"),
105    download: bool = False,
106) -> List[str]:
107    """Get paths to the SynapseWeb hippocampus HDF5 files.
108
109    Args:
110        path: Filepath to a folder where the data will be saved.
111        regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
112        download: Whether to download the data if it is not present.
113
114    Returns:
115        List of filepaths to the HDF5 files.
116    """
117    for region in regions:
118        get_synapseweb_hippocampus_data(path, region, download)
119    return [os.path.join(path, f"synapseweb_hippocampus_{region}.h5") for region in regions]

Get paths to the SynapseWeb hippocampus HDF5 files.

Arguments:
  • path: Filepath to a folder where the data will be saved.
  • regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths to the HDF5 files.

def get_synapseweb_hippocampus_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], regions: Tuple[str, ...] = ('spine', 'oblique', 'apical'), rois: Dict[str, Any] = {}, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
122def get_synapseweb_hippocampus_dataset(
123    path: Union[os.PathLike, str],
124    patch_shape: Tuple[int, int, int],
125    regions: Tuple[str, ...] = ("spine", "oblique", "apical"),
126    rois: Dict[str, Any] = {},
127    download: bool = False,
128    offsets: Optional[List[List[int]]] = None,
129    boundaries: bool = False,
130    **kwargs,
131) -> Dataset:
132    """Get the SynapseWeb hippocampus dataset for neuron segmentation in serial section TEM.
133
134    Args:
135        path: Filepath to a folder where the data will be saved.
136        patch_shape: The patch shape to use for training.
137        regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
138        rois: Dict mapping region name to a region of interest slice. Defaults to the
139            densely annotated sub-cube per region.
140        download: Whether to download the data if it is not present.
141        offsets: Offset values for affinity computation used as target.
142        boundaries: Whether to compute boundaries as the target.
143        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
144
145    Returns:
146        The segmentation dataset.
147    """
148    assert len(patch_shape) == 3
149
150    data_paths = get_synapseweb_hippocampus_paths(path, regions, download)
151    data_rois = [rois.get(region, DENSE_ROIS[region]) for region in regions]
152
153    kwargs, _ = util.add_instance_label_transform(
154        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets
155    )
156    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
157
158    return torch_em.default_segmentation_dataset(
159        raw_paths=data_paths,
160        raw_key="raw",
161        label_paths=data_paths,
162        label_key="labels",
163        patch_shape=patch_shape,
164        rois=data_rois,
165        **kwargs
166    )

Get the SynapseWeb hippocampus dataset for neuron segmentation in serial section TEM.

Arguments:
  • path: Filepath to a folder where the data will be saved.
  • patch_shape: The patch shape to use for training.
  • regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
  • rois: Dict mapping region name to a region of interest slice. Defaults to the densely annotated sub-cube per region.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_synapseweb_hippocampus_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int, int], regions: Tuple[str, ...] = ('spine', 'oblique', 'apical'), rois: Dict[str, Any] = {}, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
169def get_synapseweb_hippocampus_loader(
170    path: Union[os.PathLike, str],
171    batch_size: int,
172    patch_shape: Tuple[int, int, int],
173    regions: Tuple[str, ...] = ("spine", "oblique", "apical"),
174    rois: Dict[str, Any] = {},
175    download: bool = False,
176    offsets: Optional[List[List[int]]] = None,
177    boundaries: bool = False,
178    **kwargs,
179) -> DataLoader:
180    """Get the DataLoader for neuron segmentation in the SynapseWeb hippocampus dataset.
181
182    Args:
183        path: Filepath to a folder where the data will be saved.
184        batch_size: The batch size for training.
185        patch_shape: The patch shape to use for training.
186        regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
187        rois: Dict mapping region name to a region of interest slice. Defaults to the
188            densely annotated sub-cube per region.
189        download: Whether to download the data if it is not present.
190        offsets: Offset values for affinity computation used as target.
191        boundaries: Whether to compute boundaries as the target.
192        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
193
194    Returns:
195        The DataLoader.
196    """
197    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
198    dataset = get_synapseweb_hippocampus_dataset(
199        path, patch_shape, regions, rois, download, offsets, boundaries, **ds_kwargs
200    )
201    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the DataLoader for neuron segmentation in the SynapseWeb hippocampus dataset.

Arguments:
  • path: Filepath to a folder where the data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
  • rois: Dict mapping region name to a region of interest slice. Defaults to the densely annotated sub-cube per region.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.