torch_em.data.datasets.electron_microscopy.fib25
FIB-25 is a dataset for neuron segmentation in EM.
It contains FIB-SEM data and segmentation ground truth from the Drosophila medulla, as part of the FlyEM project at Janelia Research Campus.
The dataset is from the publication https://doi.org/10.1073/pnas.1509820112. Please cite this publication if you use the dataset in your research.
The data is hosted at https://github.com/google/ffn via Google Cloud Storage.
1"""FIB-25 is a dataset for neuron segmentation in EM. 2 3It contains FIB-SEM data and segmentation ground truth from the Drosophila medulla, 4as part of the FlyEM project at Janelia Research Campus. 5 6The dataset is from the publication https://doi.org/10.1073/pnas.1509820112. 7Please cite this publication if you use the dataset in your research. 8 9The data is hosted at https://github.com/google/ffn via Google Cloud Storage. 10""" 11 12import os 13from typing import List, Optional, Tuple, Union 14 15import numpy as np 16 17import torch_em 18 19from torch.utils.data import Dataset, DataLoader 20 21from .. import util 22 23 24GCS_BUCKET = "https://storage.googleapis.com/ffn-flyem-fib25" 25 26URLS = { 27 "training_sample2": { 28 "raw": f"{GCS_BUCKET}/training_sample2/grayscale_maps.h5", 29 "labels": f"{GCS_BUCKET}/training_sample2/groundtruth.h5", 30 }, 31 "validation_sample": { 32 "raw": f"{GCS_BUCKET}/validation_sample/grayscale_maps.h5", 33 "labels": f"{GCS_BUCKET}/validation_sample/groundtruth.h5", 34 }, 35 "tstvol-520-1": { 36 "raw": f"{GCS_BUCKET}/tstvol-520-1/raw.h5", 37 "labels": f"{GCS_BUCKET}/tstvol-520-1/groundtruth.h5", 38 }, 39} 40 41CHECKSUMS = { 42 "training_sample2": { 43 "raw": "ea031c98ee2de778a9a3a1e6d410df5de73e4ac28022df8e7255d84e3394cafa", 44 "labels": "fd508e7aee1fe51ac9ae0460db4a841d275236f013c1f2552314b4f21b1010ea", 45 }, 46 "validation_sample": { 47 "raw": "400ccb2a7268a3880c63656e0d794f8e6252e62031869455cc8caeef245b2a83", 48 "labels": "2c5e31af0af5476bc9669b88d01a4570a26eb020799eaf6131aa75f2f7d92e98", 49 }, 50 "tstvol-520-1": { 51 "raw": "0667e701c8b4464003d8a6cb0cf9deb2aa79fb415ec51deeac92e5f9c67a5a66", 52 "labels": "ae61ae78a9874eb35ae8e5ed29b4cbfe7bbd07a61789ddb70aef4deb2532eb4e", 53 }, 54} 55 56SAMPLES = list(URLS.keys()) 57 58 59def _apply_transforms(groundtruth_path): 60 """Apply the supervoxel-to-neuron mapping from the 'transforms' dataset. 61 62 The groundtruth h5 files contain a 'stack' dataset with supervoxel IDs 63 and a 'transforms' dataset that maps supervoxels to neuron body IDs. 64 This function applies the mapping and saves the result as 'neuron_ids'. 65 """ 66 import h5py 67 68 with h5py.File(groundtruth_path, "a") as f: 69 if "neuron_ids" in f: 70 return 71 72 stack = f["stack"][:] 73 transforms = f["transforms"][:] 74 75 # Build the mapping from supervoxel IDs to neuron body IDs. 76 mapping = np.zeros(stack.max() + 1, dtype=stack.dtype) 77 for src, dst in transforms: 78 mapping[src] = dst 79 neuron_ids = mapping[stack] 80 81 f.create_dataset("neuron_ids", data=neuron_ids, compression="gzip") 82 83 84def get_fib25_data( 85 path: Union[os.PathLike, str], samples: Tuple[str, ...], download: bool = False 86): 87 """Download the FIB-25 dataset. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 samples: The samples to download. Available samples are 92 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 93 download: Whether to download the data if it is not present. 94 """ 95 os.makedirs(path, exist_ok=True) 96 for sample in samples: 97 assert sample in URLS, f"Invalid sample: {sample}. Choose from {SAMPLES}." 98 urls = URLS[sample] 99 checksums = CHECKSUMS[sample] 100 101 sample_dir = os.path.join(path, sample) 102 os.makedirs(sample_dir, exist_ok=True) 103 104 raw_path = os.path.join(sample_dir, "raw.h5") 105 labels_path = os.path.join(sample_dir, "groundtruth.h5") 106 107 util.download_source(raw_path, urls["raw"], download, checksum=checksums["raw"]) 108 util.download_source(labels_path, urls["labels"], download, checksum=checksums["labels"]) 109 110 # Apply the supervoxel-to-neuron mapping. 111 _apply_transforms(labels_path) 112 113 114def get_fib25_paths( 115 path: Union[os.PathLike, str], 116 samples: Tuple[str, ...] = ("training_sample2",), 117 download: bool = False, 118) -> Tuple[List[str], List[str]]: 119 """Get paths to the FIB-25 data. 120 121 Args: 122 path: Filepath to a folder where the downloaded data will be saved. 123 samples: The samples to use. Available samples are 124 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 125 download: Whether to download the data if it is not present. 126 127 Returns: 128 The filepaths to the raw data and the label data. 129 """ 130 get_fib25_data(path, samples, download) 131 raw_paths = [os.path.join(path, sample, "raw.h5") for sample in samples] 132 label_paths = [os.path.join(path, sample, "groundtruth.h5") for sample in samples] 133 return raw_paths, label_paths 134 135 136def get_fib25_dataset( 137 path: Union[os.PathLike, str], 138 patch_shape: Tuple[int, int, int], 139 samples: Tuple[str, ...] = ("training_sample2",), 140 download: bool = False, 141 offsets: Optional[List[List[int]]] = None, 142 boundaries: bool = False, 143 **kwargs, 144) -> Dataset: 145 """Get the FIB-25 dataset for the segmentation of neurons in EM. 146 147 Args: 148 path: Filepath to a folder where the downloaded data will be saved. 149 patch_shape: The patch shape to use for training. 150 samples: The samples to use. Available samples are 151 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 152 download: Whether to download the data if it is not present. 153 offsets: Offset values for affinity computation used as target. 154 boundaries: Whether to compute boundaries as the target. 155 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 156 157 Returns: 158 The segmentation dataset. 159 """ 160 assert len(patch_shape) == 3 161 162 raw_paths, label_paths = get_fib25_paths(path, samples, download) 163 164 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 165 kwargs, _ = util.add_instance_label_transform( 166 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 167 ) 168 169 return torch_em.default_segmentation_dataset( 170 raw_paths=raw_paths, 171 raw_key="raw", 172 label_paths=label_paths, 173 label_key="neuron_ids", 174 patch_shape=patch_shape, 175 **kwargs, 176 ) 177 178 179def get_fib25_loader( 180 path: Union[os.PathLike, str], 181 patch_shape: Tuple[int, int, int], 182 batch_size: int, 183 samples: Tuple[str, ...] = ("training_sample2",), 184 download: bool = False, 185 offsets: Optional[List[List[int]]] = None, 186 boundaries: bool = False, 187 **kwargs, 188) -> DataLoader: 189 """Get the DataLoader for EM neuron segmentation in the FIB-25 dataset. 190 191 Args: 192 path: Filepath to a folder where the downloaded data will be saved. 193 patch_shape: The patch shape to use for training. 194 batch_size: The batch size for training. 195 samples: The samples to use. Available samples are 196 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 197 download: Whether to download the data if it is not present. 198 offsets: Offset values for affinity computation used as target. 199 boundaries: Whether to compute boundaries as the target. 200 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 201 202 Returns: 203 The DataLoader. 204 """ 205 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 206 ds = get_fib25_dataset( 207 path=path, 208 patch_shape=patch_shape, 209 samples=samples, 210 download=download, 211 offsets=offsets, 212 boundaries=boundaries, 213 **ds_kwargs, 214 ) 215 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
GCS_BUCKET =
'https://storage.googleapis.com/ffn-flyem-fib25'
URLS =
{'training_sample2': {'raw': 'https://storage.googleapis.com/ffn-flyem-fib25/training_sample2/grayscale_maps.h5', 'labels': 'https://storage.googleapis.com/ffn-flyem-fib25/training_sample2/groundtruth.h5'}, 'validation_sample': {'raw': 'https://storage.googleapis.com/ffn-flyem-fib25/validation_sample/grayscale_maps.h5', 'labels': 'https://storage.googleapis.com/ffn-flyem-fib25/validation_sample/groundtruth.h5'}, 'tstvol-520-1': {'raw': 'https://storage.googleapis.com/ffn-flyem-fib25/tstvol-520-1/raw.h5', 'labels': 'https://storage.googleapis.com/ffn-flyem-fib25/tstvol-520-1/groundtruth.h5'}}
CHECKSUMS =
{'training_sample2': {'raw': 'ea031c98ee2de778a9a3a1e6d410df5de73e4ac28022df8e7255d84e3394cafa', 'labels': 'fd508e7aee1fe51ac9ae0460db4a841d275236f013c1f2552314b4f21b1010ea'}, 'validation_sample': {'raw': '400ccb2a7268a3880c63656e0d794f8e6252e62031869455cc8caeef245b2a83', 'labels': '2c5e31af0af5476bc9669b88d01a4570a26eb020799eaf6131aa75f2f7d92e98'}, 'tstvol-520-1': {'raw': '0667e701c8b4464003d8a6cb0cf9deb2aa79fb415ec51deeac92e5f9c67a5a66', 'labels': 'ae61ae78a9874eb35ae8e5ed29b4cbfe7bbd07a61789ddb70aef4deb2532eb4e'}}
SAMPLES =
['training_sample2', 'validation_sample', 'tstvol-520-1']
def
get_fib25_data( path: Union[os.PathLike, str], samples: Tuple[str, ...], download: bool = False):
85def get_fib25_data( 86 path: Union[os.PathLike, str], samples: Tuple[str, ...], download: bool = False 87): 88 """Download the FIB-25 dataset. 89 90 Args: 91 path: Filepath to a folder where the downloaded data will be saved. 92 samples: The samples to download. Available samples are 93 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 94 download: Whether to download the data if it is not present. 95 """ 96 os.makedirs(path, exist_ok=True) 97 for sample in samples: 98 assert sample in URLS, f"Invalid sample: {sample}. Choose from {SAMPLES}." 99 urls = URLS[sample] 100 checksums = CHECKSUMS[sample] 101 102 sample_dir = os.path.join(path, sample) 103 os.makedirs(sample_dir, exist_ok=True) 104 105 raw_path = os.path.join(sample_dir, "raw.h5") 106 labels_path = os.path.join(sample_dir, "groundtruth.h5") 107 108 util.download_source(raw_path, urls["raw"], download, checksum=checksums["raw"]) 109 util.download_source(labels_path, urls["labels"], download, checksum=checksums["labels"]) 110 111 # Apply the supervoxel-to-neuron mapping. 112 _apply_transforms(labels_path)
Download the FIB-25 dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- samples: The samples to download. Available samples are 'training_sample2', 'validation_sample', and 'tstvol-520-1'.
- download: Whether to download the data if it is not present.
def
get_fib25_paths( path: Union[os.PathLike, str], samples: Tuple[str, ...] = ('training_sample2',), download: bool = False) -> Tuple[List[str], List[str]]:
115def get_fib25_paths( 116 path: Union[os.PathLike, str], 117 samples: Tuple[str, ...] = ("training_sample2",), 118 download: bool = False, 119) -> Tuple[List[str], List[str]]: 120 """Get paths to the FIB-25 data. 121 122 Args: 123 path: Filepath to a folder where the downloaded data will be saved. 124 samples: The samples to use. Available samples are 125 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 126 download: Whether to download the data if it is not present. 127 128 Returns: 129 The filepaths to the raw data and the label data. 130 """ 131 get_fib25_data(path, samples, download) 132 raw_paths = [os.path.join(path, sample, "raw.h5") for sample in samples] 133 label_paths = [os.path.join(path, sample, "groundtruth.h5") for sample in samples] 134 return raw_paths, label_paths
Get paths to the FIB-25 data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- samples: The samples to use. Available samples are 'training_sample2', 'validation_sample', and 'tstvol-520-1'.
- download: Whether to download the data if it is not present.
Returns:
The filepaths to the raw data and the label data.
def
get_fib25_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], samples: Tuple[str, ...] = ('training_sample2',), download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
137def get_fib25_dataset( 138 path: Union[os.PathLike, str], 139 patch_shape: Tuple[int, int, int], 140 samples: Tuple[str, ...] = ("training_sample2",), 141 download: bool = False, 142 offsets: Optional[List[List[int]]] = None, 143 boundaries: bool = False, 144 **kwargs, 145) -> Dataset: 146 """Get the FIB-25 dataset for the segmentation of neurons in EM. 147 148 Args: 149 path: Filepath to a folder where the downloaded data will be saved. 150 patch_shape: The patch shape to use for training. 151 samples: The samples to use. Available samples are 152 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 153 download: Whether to download the data if it is not present. 154 offsets: Offset values for affinity computation used as target. 155 boundaries: Whether to compute boundaries as the target. 156 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 157 158 Returns: 159 The segmentation dataset. 160 """ 161 assert len(patch_shape) == 3 162 163 raw_paths, label_paths = get_fib25_paths(path, samples, download) 164 165 kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True) 166 kwargs, _ = util.add_instance_label_transform( 167 kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets 168 ) 169 170 return torch_em.default_segmentation_dataset( 171 raw_paths=raw_paths, 172 raw_key="raw", 173 label_paths=label_paths, 174 label_key="neuron_ids", 175 patch_shape=patch_shape, 176 **kwargs, 177 )
Get the FIB-25 dataset for the segmentation of neurons in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- samples: The samples to use. Available samples are 'training_sample2', 'validation_sample', and 'tstvol-520-1'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_fib25_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, samples: Tuple[str, ...] = ('training_sample2',), download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
180def get_fib25_loader( 181 path: Union[os.PathLike, str], 182 patch_shape: Tuple[int, int, int], 183 batch_size: int, 184 samples: Tuple[str, ...] = ("training_sample2",), 185 download: bool = False, 186 offsets: Optional[List[List[int]]] = None, 187 boundaries: bool = False, 188 **kwargs, 189) -> DataLoader: 190 """Get the DataLoader for EM neuron segmentation in the FIB-25 dataset. 191 192 Args: 193 path: Filepath to a folder where the downloaded data will be saved. 194 patch_shape: The patch shape to use for training. 195 batch_size: The batch size for training. 196 samples: The samples to use. Available samples are 197 'training_sample2', 'validation_sample', and 'tstvol-520-1'. 198 download: Whether to download the data if it is not present. 199 offsets: Offset values for affinity computation used as target. 200 boundaries: Whether to compute boundaries as the target. 201 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 202 203 Returns: 204 The DataLoader. 205 """ 206 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 207 ds = get_fib25_dataset( 208 path=path, 209 patch_shape=patch_shape, 210 samples=samples, 211 download=download, 212 offsets=offsets, 213 boundaries=boundaries, 214 **ds_kwargs, 215 ) 216 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the DataLoader for EM neuron segmentation in the FIB-25 dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- samples: The samples to use. Available samples are 'training_sample2', 'validation_sample', and 'tstvol-520-1'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.