torch_em.data.datasets.electron_microscopy.synapseweb_hippocampus
The SynapseWeb hippocampus dataset contains three volumes of hippocampal CA1 neuropil from adult rat, imaged with serial section TEM at ~2x2x50 nm resolution. All axons, dendrites, glia, and synapses are reconstructed as instance segmentations.
The dataset is described in Harris et al. (2015): "A resource from 3D electron microscopy of hippocampal neuropil for user training and tool development" https://doi.org/10.1038/sdata.2015.46
Please cite this publication if you use this dataset in your research.
1"""The SynapseWeb hippocampus dataset contains three volumes of hippocampal CA1 neuropil 2from adult rat, imaged with serial section TEM at ~2x2x50 nm resolution. All axons, 3dendrites, glia, and synapses are reconstructed as instance segmentations. 4 5The dataset is described in Harris et al. (2015): 6"A resource from 3D electron microscopy of hippocampal neuropil for user training and tool development" 7https://doi.org/10.1038/sdata.2015.46 8 9Please cite this publication if you use this dataset in your research. 10""" 11 12import os 13from typing import Any, Dict, List, Optional, Tuple, Union 14 15import h5py 16import numpy as np 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25REGIONS = ("spine", "oblique", "apical") 26 27# Bounding boxes (x0, x1, y0, y1, z0, z1) of the annotated sub-regions within each CloudVolume. 28ANNO_BBOXES = { 29 "spine": (3072, 6144, 1536, 3840, 30, 80), 30 "oblique": (512, 4608, 768, 4608, 6, 91), 31 "apical": (2048, 6144, 2048, 6400, 55, 167), 32} 33 34# ROIs covering only the densely annotated cube within each downloaded volume, 35# determined by finding contiguous slices with >5% label coverage per axis. 36DENSE_ROIS = { 37 "spine": np.s_[0:42, 784:1665, 1007:1944], 38 "oblique": np.s_[5:75, 1243:3505, 1385:3215], 39 "apical": np.s_[5:106, 217:3681, 477:3936], 40} 41 42 43def _download_volume(region, out_path): 44 try: 45 from cloudvolume import CloudVolume 46 except ImportError: 47 raise ImportError( 48 "cloudvolume is required to download this data. Install it with: pip install cloud-volume" 49 ) 50 51 x0, x1, y0, y1, z0, z1 = ANNO_BBOXES[region] 52 nx, ny, nz = x1 - x0, y1 - y0, z1 - z0 53 54 vol_em = CloudVolume( 55 f"s3://open-neurodata/kharris15/{region}/em", mip=0, use_https=True, fill_missing=True 56 ) 57 vol_anno = CloudVolume( 58 f"s3://open-neurodata/kharris15/{region}/anno", mip=0, use_https=True, fill_missing=True 59 ) 60 61 # Download in z-slabs and write incrementally to avoid loading the full volume into memory. 62 z_slab = 16 63 with h5py.File(out_path, "w") as f: 64 ds_raw = f.create_dataset("raw", shape=(nz, ny, nx), dtype="uint8", compression="gzip") 65 ds_labels = f.create_dataset("labels", shape=(nz, ny, nx), dtype="uint64", compression="gzip") 66 for z in range(z0, z1, z_slab): 67 ze = min(z + z_slab, z1) 68 # CloudVolume returns (x, y, z, channel); squeeze and transpose to (z, y, x). 69 slab_raw = np.array(vol_em[x0:x1, y0:y1, z:ze]).squeeze().transpose(2, 1, 0) 70 slab_labels = np.array(vol_anno[x0:x1, y0:y1, z:ze]).squeeze().transpose(2, 1, 0) 71 zi = z - z0 72 ds_raw[zi:zi + ze - z] = slab_raw 73 ds_labels[zi:zi + ze - z] = slab_labels 74 75 76def get_synapseweb_hippocampus_data(path: Union[os.PathLike, str], region: str, download: bool): 77 """Download the SynapseWeb hippocampus data for a given region and store it as an HDF5 file. 78 79 Args: 80 path: Filepath to a folder where the data will be saved. 81 region: The region to download. One of 'spine', 'oblique', 'apical'. 82 download: Whether to download the data if it is not present. 83 """ 84 if region not in REGIONS: 85 raise ValueError(f"'{region}' is not a valid region. Choose from {REGIONS}.") 86 87 os.makedirs(path, exist_ok=True) 88 out_path = os.path.join(path, f"synapseweb_hippocampus_{region}.h5") 89 if os.path.exists(out_path): 90 return 91 92 if not download: 93 raise RuntimeError( 94 f"SynapseWeb hippocampus data for region '{region}' not found at {out_path}. " 95 "Pass download=True to download it." 96 ) 97 98 _download_volume(region, out_path) 99 100 101def get_synapseweb_hippocampus_paths( 102 path: Union[os.PathLike, str], 103 regions: Tuple[str, ...] = ("spine", "oblique", "apical"), 104 download: bool = False, 105) -> List[str]: 106 """Get paths to the SynapseWeb hippocampus HDF5 files. 107 108 Args: 109 path: Filepath to a folder where the data will be saved. 110 regions: The regions to use. Subset of 'spine', 'oblique', 'apical'. 111 download: Whether to download the data if it is not present. 112 113 Returns: 114 List of filepaths to the HDF5 files. 115 """ 116 for region in regions: 117 get_synapseweb_hippocampus_data(path, region, download) 118 return [os.path.join(path, f"synapseweb_hippocampus_{region}.h5") for region in regions] 119 120 121def get_synapseweb_hippocampus_dataset( 122 path: Union[os.PathLike, str], 123 patch_shape: Tuple[int, int, int], 124 regions: Tuple[str, ...] = ("spine", "oblique", "apical"), 125 rois: Dict[str, Any] = {}, 126 download: bool = False, 127 offsets: Optional[List[List[int]]] = None, 128 boundaries: bool = False, 129 **kwargs, 130) -> Dataset: 131 """Get the SynapseWeb hippocampus dataset for neuron segmentation in serial section TEM. 132 133 Args: 134 path: Filepath to a folder where the data will be saved. 135 patch_shape: The patch shape to use for training. 136 regions: The regions to use. Subset of 'spine', 'oblique', 'apical'. 137 rois: Dict mapping region name to a region of interest slice. Defaults to the 138 densely annotated sub-cube per region. 139 download: Whether to download the data if it is not present. 140 offsets: Offset values for affinity computation used as target. 141 boundaries: Whether to compute boundaries as the target. 142 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 143 144 Returns: 145 The segmentation dataset. 146 """ 147 assert len(patch_shape) == 3 148 149 data_paths = get_synapseweb_hippocampus_paths(path, regions, download) 150 data_rois = [rois.get(region, DENSE_ROIS[region]) for region in regions] 151 152 kwargs, _ = util.add_instance_label_transform( 153 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 154 ) 155 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 156 157 return torch_em.default_segmentation_dataset( 158 raw_paths=data_paths, 159 raw_key="raw", 160 label_paths=data_paths, 161 label_key="labels", 162 patch_shape=patch_shape, 163 rois=data_rois, 164 **kwargs 165 ) 166 167 168def get_synapseweb_hippocampus_loader( 169 path: Union[os.PathLike, str], 170 batch_size: int, 171 patch_shape: Tuple[int, int, int], 172 regions: Tuple[str, ...] = ("spine", "oblique", "apical"), 173 rois: Dict[str, Any] = {}, 174 download: bool = False, 175 offsets: Optional[List[List[int]]] = None, 176 boundaries: bool = False, 177 **kwargs, 178) -> DataLoader: 179 """Get the DataLoader for neuron segmentation in the SynapseWeb hippocampus dataset. 180 181 Args: 182 path: Filepath to a folder where the data will be saved. 183 batch_size: The batch size for training. 184 patch_shape: The patch shape to use for training. 185 regions: The regions to use. Subset of 'spine', 'oblique', 'apical'. 186 rois: Dict mapping region name to a region of interest slice. Defaults to the 187 densely annotated sub-cube per region. 188 download: Whether to download the data if it is not present. 189 offsets: Offset values for affinity computation used as target. 190 boundaries: Whether to compute boundaries as the target. 191 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 192 193 Returns: 194 The DataLoader. 195 """ 196 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 197 dataset = get_synapseweb_hippocampus_dataset( 198 path, patch_shape, regions, rois, download, offsets, boundaries, **ds_kwargs 199 ) 200 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
77def get_synapseweb_hippocampus_data(path: Union[os.PathLike, str], region: str, download: bool): 78 """Download the SynapseWeb hippocampus data for a given region and store it as an HDF5 file. 79 80 Args: 81 path: Filepath to a folder where the data will be saved. 82 region: The region to download. One of 'spine', 'oblique', 'apical'. 83 download: Whether to download the data if it is not present. 84 """ 85 if region not in REGIONS: 86 raise ValueError(f"'{region}' is not a valid region. Choose from {REGIONS}.") 87 88 os.makedirs(path, exist_ok=True) 89 out_path = os.path.join(path, f"synapseweb_hippocampus_{region}.h5") 90 if os.path.exists(out_path): 91 return 92 93 if not download: 94 raise RuntimeError( 95 f"SynapseWeb hippocampus data for region '{region}' not found at {out_path}. " 96 "Pass download=True to download it." 97 ) 98 99 _download_volume(region, out_path)
Download the SynapseWeb hippocampus data for a given region and store it as an HDF5 file.
Arguments:
- path: Filepath to a folder where the data will be saved.
- region: The region to download. One of 'spine', 'oblique', 'apical'.
- download: Whether to download the data if it is not present.
102def get_synapseweb_hippocampus_paths( 103 path: Union[os.PathLike, str], 104 regions: Tuple[str, ...] = ("spine", "oblique", "apical"), 105 download: bool = False, 106) -> List[str]: 107 """Get paths to the SynapseWeb hippocampus HDF5 files. 108 109 Args: 110 path: Filepath to a folder where the data will be saved. 111 regions: The regions to use. Subset of 'spine', 'oblique', 'apical'. 112 download: Whether to download the data if it is not present. 113 114 Returns: 115 List of filepaths to the HDF5 files. 116 """ 117 for region in regions: 118 get_synapseweb_hippocampus_data(path, region, download) 119 return [os.path.join(path, f"synapseweb_hippocampus_{region}.h5") for region in regions]
Get paths to the SynapseWeb hippocampus HDF5 files.
Arguments:
- path: Filepath to a folder where the data will be saved.
- regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths to the HDF5 files.
122def get_synapseweb_hippocampus_dataset( 123 path: Union[os.PathLike, str], 124 patch_shape: Tuple[int, int, int], 125 regions: Tuple[str, ...] = ("spine", "oblique", "apical"), 126 rois: Dict[str, Any] = {}, 127 download: bool = False, 128 offsets: Optional[List[List[int]]] = None, 129 boundaries: bool = False, 130 **kwargs, 131) -> Dataset: 132 """Get the SynapseWeb hippocampus dataset for neuron segmentation in serial section TEM. 133 134 Args: 135 path: Filepath to a folder where the data will be saved. 136 patch_shape: The patch shape to use for training. 137 regions: The regions to use. Subset of 'spine', 'oblique', 'apical'. 138 rois: Dict mapping region name to a region of interest slice. Defaults to the 139 densely annotated sub-cube per region. 140 download: Whether to download the data if it is not present. 141 offsets: Offset values for affinity computation used as target. 142 boundaries: Whether to compute boundaries as the target. 143 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 144 145 Returns: 146 The segmentation dataset. 147 """ 148 assert len(patch_shape) == 3 149 150 data_paths = get_synapseweb_hippocampus_paths(path, regions, download) 151 data_rois = [rois.get(region, DENSE_ROIS[region]) for region in regions] 152 153 kwargs, _ = util.add_instance_label_transform( 154 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 155 ) 156 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 157 158 return torch_em.default_segmentation_dataset( 159 raw_paths=data_paths, 160 raw_key="raw", 161 label_paths=data_paths, 162 label_key="labels", 163 patch_shape=patch_shape, 164 rois=data_rois, 165 **kwargs 166 )
Get the SynapseWeb hippocampus dataset for neuron segmentation in serial section TEM.
Arguments:
- path: Filepath to a folder where the data will be saved.
- patch_shape: The patch shape to use for training.
- regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
- rois: Dict mapping region name to a region of interest slice. Defaults to the densely annotated sub-cube per region.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
169def get_synapseweb_hippocampus_loader( 170 path: Union[os.PathLike, str], 171 batch_size: int, 172 patch_shape: Tuple[int, int, int], 173 regions: Tuple[str, ...] = ("spine", "oblique", "apical"), 174 rois: Dict[str, Any] = {}, 175 download: bool = False, 176 offsets: Optional[List[List[int]]] = None, 177 boundaries: bool = False, 178 **kwargs, 179) -> DataLoader: 180 """Get the DataLoader for neuron segmentation in the SynapseWeb hippocampus dataset. 181 182 Args: 183 path: Filepath to a folder where the data will be saved. 184 batch_size: The batch size for training. 185 patch_shape: The patch shape to use for training. 186 regions: The regions to use. Subset of 'spine', 'oblique', 'apical'. 187 rois: Dict mapping region name to a region of interest slice. Defaults to the 188 densely annotated sub-cube per region. 189 download: Whether to download the data if it is not present. 190 offsets: Offset values for affinity computation used as target. 191 boundaries: Whether to compute boundaries as the target. 192 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 193 194 Returns: 195 The DataLoader. 196 """ 197 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 198 dataset = get_synapseweb_hippocampus_dataset( 199 path, patch_shape, regions, rois, download, offsets, boundaries, **ds_kwargs 200 ) 201 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the DataLoader for neuron segmentation in the SynapseWeb hippocampus dataset.
Arguments:
- path: Filepath to a folder where the data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- regions: The regions to use. Subset of 'spine', 'oblique', 'apical'.
- rois: Dict mapping region name to a region of interest slice. Defaults to the densely annotated sub-cube per region.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.