torch_em.data.datasets.electron_microscopy.mousecc
MouseCC is a dataset for myelin and intra-axonal space segmentation in 3D SEM images of the mouse brain genu of corpus callosum.
The volume contains 200 SEM slices at 24 x 24 x 100 nm resolution (36 x 48 x 20 um). Two segmentation labels are provided:
- myelin: binary mask from a pixel-wise classifier.
- fibers: intra-axonal space, uniquely labeled per axon via random-walker segmentation.
This dataset is from the publication https://doi.org/10.1007/s00429-019-01844-6. Please cite it if you use this dataset in your research.
The data is listed at https://datacatalog.med.nyu.edu/dataset/10432. It requires manual download after agreeing to the terms of use at: https://cai2r.net/resources/intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/
After downloading, place the following four files in a local directory and pass it as path:
- datac.nii
- maskc.nii
- myelin_mask.nii
- fibers.nii
1"""MouseCC is a dataset for myelin and intra-axonal space segmentation in 3D SEM images 2of the mouse brain genu of corpus callosum. 3 4The volume contains 200 SEM slices at 24 x 24 x 100 nm resolution (36 x 48 x 20 um). 5Two segmentation labels are provided: 6- myelin: binary mask from a pixel-wise classifier. 7- fibers: intra-axonal space, uniquely labeled per axon via random-walker segmentation. 8 9This dataset is from the publication https://doi.org/10.1007/s00429-019-01844-6. 10Please cite it if you use this dataset in your research. 11 12The data is listed at https://datacatalog.med.nyu.edu/dataset/10432. 13It requires manual download after agreeing to the terms of use at: 14https://cai2r.net/resources/intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/ 15 16After downloading, place the following four files in a local directory and pass it as `path`: 17- datac.nii 18- maskc.nii 19- myelin_mask.nii 20- fibers.nii 21""" 22 23import os 24from typing import Literal, Tuple, Union 25 26import numpy as np 27from torch.utils.data import Dataset, DataLoader 28 29import torch_em 30 31from .. import util 32 33 34MOUSECC_FILES = ["datac.nii", "maskc.nii", "myelin_mask.nii", "fibers.nii"] 35MOUSECC_DOWNLOAD_URL = ( 36 "https://cai2r.net/resources/" 37 "intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/" 38) 39 40 41def _require_mousecc_files(path): 42 missing = [f for f in MOUSECC_FILES if not os.path.exists(os.path.join(str(path), f))] 43 if missing: 44 raise RuntimeError( 45 f"MouseCC files not found in {path}: {missing}\n" 46 "This dataset requires manual download. Please fill in the form at:\n" 47 f"{MOUSECC_DOWNLOAD_URL}\n" 48 "and place datac.nii, maskc.nii, myelin_mask.nii, fibers.nii in the path directory." 49 ) 50 51 52def _convert_to_h5(path): 53 import h5py 54 try: 55 import nibabel as nib 56 except ImportError: 57 raise RuntimeError("nibabel is required to process MouseCC data: pip install nibabel") 58 59 h5_path = os.path.join(str(path), "mousecc.h5") 60 if os.path.exists(h5_path): 61 return h5_path 62 63 _require_mousecc_files(path) 64 65 def load_nii(fname, dtype): 66 # nibabel returns (x, y, z); transpose to torch-em convention (z, y, x). 67 data = nib.load(os.path.join(str(path), fname)).get_fdata().astype(dtype) 68 return np.moveaxis(data, -1, 0) 69 70 raw = load_nii("datac.nii", "uint8") 71 foreground = load_nii("maskc.nii", "uint8") 72 myelin = load_nii("myelin_mask.nii", "uint8") 73 fibers = load_nii("fibers.nii", "uint32") 74 75 with h5py.File(h5_path, "w") as f: 76 f.create_dataset("raw", data=raw, compression="gzip") 77 f.create_dataset("labels/foreground", data=foreground, compression="gzip") 78 f.create_dataset("labels/myelin", data=myelin, compression="gzip") 79 f.create_dataset("labels/fibers", data=fibers, compression="gzip") 80 81 return h5_path 82 83 84def get_mousecc_data(path: Union[os.PathLike, str], download: bool = False) -> str: 85 """Prepare the MouseCC data as an HDF5 file. 86 87 Args: 88 path: Filepath to the folder containing the manually downloaded NIfTI files. 89 download: Ignored - this dataset requires manual download. 90 91 Returns: 92 Path to the converted HDF5 file. 93 """ 94 os.makedirs(str(path), exist_ok=True) 95 if download: 96 raise RuntimeError( 97 "Automatic download is not supported for MouseCC.\n" 98 "Please download the data manually from:\n" 99 f"{MOUSECC_DOWNLOAD_URL}" 100 ) 101 return _convert_to_h5(path) 102 103 104def get_mousecc_paths( 105 path: Union[os.PathLike, str], 106 label_choice: Literal["myelin", "fibers"] = "myelin", 107 download: bool = False, 108) -> Tuple[str, str]: 109 """Get paths to the MouseCC HDF5 data. 110 111 Args: 112 path: Filepath to the folder containing the manually downloaded NIfTI files. 113 label_choice: The segmentation target. Either 'myelin' or 'fibers'. 114 download: Ignored - this dataset requires manual download. 115 116 Returns: 117 Path to the HDF5 file for raw data. 118 Path to the HDF5 file for labels. 119 """ 120 if label_choice not in ("myelin", "fibers"): 121 raise ValueError(f"Invalid label_choice: '{label_choice}'. Choose 'myelin' or 'fibers'.") 122 h5_path = get_mousecc_data(path, download) 123 return h5_path, h5_path 124 125 126def get_mousecc_dataset( 127 path: Union[os.PathLike, str], 128 patch_shape: Tuple[int, ...], 129 label_choice: Literal["myelin", "fibers"] = "myelin", 130 download: bool = False, 131 **kwargs, 132) -> Dataset: 133 """Get the MouseCC dataset for myelin or intra-axonal space segmentation in SEM. 134 135 Args: 136 path: Filepath to the folder containing the manually downloaded NIfTI files. 137 patch_shape: The patch shape to use for training. 138 label_choice: The segmentation target. 'myelin' for the binary myelin mask, 139 or 'fibers' for the intra-axonal space with unique labels per axon. 140 download: Ignored - this dataset requires manual download. 141 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 142 143 Returns: 144 The segmentation dataset. 145 """ 146 raw_path, label_path = get_mousecc_paths(path, label_choice, download) 147 return torch_em.default_segmentation_dataset( 148 raw_paths=raw_path, raw_key="raw", 149 label_paths=label_path, label_key=f"labels/{label_choice}", 150 patch_shape=patch_shape, **kwargs 151 ) 152 153 154def get_mousecc_loader( 155 path: Union[os.PathLike, str], 156 patch_shape: Tuple[int, ...], 157 batch_size: int, 158 label_choice: Literal["myelin", "fibers"] = "myelin", 159 download: bool = False, 160 **kwargs, 161) -> DataLoader: 162 """Get the DataLoader for myelin or intra-axonal space segmentation in MouseCC SEM data. 163 164 Args: 165 path: Filepath to the folder containing the manually downloaded NIfTI files. 166 patch_shape: The patch shape to use for training. 167 batch_size: The batch size for training. 168 label_choice: The segmentation target. 'myelin' for the binary myelin mask, 169 or 'fibers' for the intra-axonal space with unique labels per axon. 170 download: Ignored - this dataset requires manual download. 171 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 172 173 Returns: 174 The DataLoader. 175 """ 176 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 177 dataset = get_mousecc_dataset( 178 path, patch_shape, label_choice=label_choice, download=download, **ds_kwargs 179 ) 180 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
85def get_mousecc_data(path: Union[os.PathLike, str], download: bool = False) -> str: 86 """Prepare the MouseCC data as an HDF5 file. 87 88 Args: 89 path: Filepath to the folder containing the manually downloaded NIfTI files. 90 download: Ignored - this dataset requires manual download. 91 92 Returns: 93 Path to the converted HDF5 file. 94 """ 95 os.makedirs(str(path), exist_ok=True) 96 if download: 97 raise RuntimeError( 98 "Automatic download is not supported for MouseCC.\n" 99 "Please download the data manually from:\n" 100 f"{MOUSECC_DOWNLOAD_URL}" 101 ) 102 return _convert_to_h5(path)
Prepare the MouseCC data as an HDF5 file.
Arguments:
- path: Filepath to the folder containing the manually downloaded NIfTI files.
- download: Ignored - this dataset requires manual download.
Returns:
Path to the converted HDF5 file.
105def get_mousecc_paths( 106 path: Union[os.PathLike, str], 107 label_choice: Literal["myelin", "fibers"] = "myelin", 108 download: bool = False, 109) -> Tuple[str, str]: 110 """Get paths to the MouseCC HDF5 data. 111 112 Args: 113 path: Filepath to the folder containing the manually downloaded NIfTI files. 114 label_choice: The segmentation target. Either 'myelin' or 'fibers'. 115 download: Ignored - this dataset requires manual download. 116 117 Returns: 118 Path to the HDF5 file for raw data. 119 Path to the HDF5 file for labels. 120 """ 121 if label_choice not in ("myelin", "fibers"): 122 raise ValueError(f"Invalid label_choice: '{label_choice}'. Choose 'myelin' or 'fibers'.") 123 h5_path = get_mousecc_data(path, download) 124 return h5_path, h5_path
Get paths to the MouseCC HDF5 data.
Arguments:
- path: Filepath to the folder containing the manually downloaded NIfTI files.
- label_choice: The segmentation target. Either 'myelin' or 'fibers'.
- download: Ignored - this dataset requires manual download.
Returns:
Path to the HDF5 file for raw data. Path to the HDF5 file for labels.
127def get_mousecc_dataset( 128 path: Union[os.PathLike, str], 129 patch_shape: Tuple[int, ...], 130 label_choice: Literal["myelin", "fibers"] = "myelin", 131 download: bool = False, 132 **kwargs, 133) -> Dataset: 134 """Get the MouseCC dataset for myelin or intra-axonal space segmentation in SEM. 135 136 Args: 137 path: Filepath to the folder containing the manually downloaded NIfTI files. 138 patch_shape: The patch shape to use for training. 139 label_choice: The segmentation target. 'myelin' for the binary myelin mask, 140 or 'fibers' for the intra-axonal space with unique labels per axon. 141 download: Ignored - this dataset requires manual download. 142 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 143 144 Returns: 145 The segmentation dataset. 146 """ 147 raw_path, label_path = get_mousecc_paths(path, label_choice, download) 148 return torch_em.default_segmentation_dataset( 149 raw_paths=raw_path, raw_key="raw", 150 label_paths=label_path, label_key=f"labels/{label_choice}", 151 patch_shape=patch_shape, **kwargs 152 )
Get the MouseCC dataset for myelin or intra-axonal space segmentation in SEM.
Arguments:
- path: Filepath to the folder containing the manually downloaded NIfTI files.
- patch_shape: The patch shape to use for training.
- label_choice: The segmentation target. 'myelin' for the binary myelin mask, or 'fibers' for the intra-axonal space with unique labels per axon.
- download: Ignored - this dataset requires manual download.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
155def get_mousecc_loader( 156 path: Union[os.PathLike, str], 157 patch_shape: Tuple[int, ...], 158 batch_size: int, 159 label_choice: Literal["myelin", "fibers"] = "myelin", 160 download: bool = False, 161 **kwargs, 162) -> DataLoader: 163 """Get the DataLoader for myelin or intra-axonal space segmentation in MouseCC SEM data. 164 165 Args: 166 path: Filepath to the folder containing the manually downloaded NIfTI files. 167 patch_shape: The patch shape to use for training. 168 batch_size: The batch size for training. 169 label_choice: The segmentation target. 'myelin' for the binary myelin mask, 170 or 'fibers' for the intra-axonal space with unique labels per axon. 171 download: Ignored - this dataset requires manual download. 172 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 173 174 Returns: 175 The DataLoader. 176 """ 177 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 178 dataset = get_mousecc_dataset( 179 path, patch_shape, label_choice=label_choice, download=download, **ds_kwargs 180 ) 181 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the DataLoader for myelin or intra-axonal space segmentation in MouseCC SEM data.
Arguments:
- path: Filepath to the folder containing the manually downloaded NIfTI files.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- label_choice: The segmentation target. 'myelin' for the binary myelin mask, or 'fibers' for the intra-axonal space with unique labels per axon.
- download: Ignored - this dataset requires manual download.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.