torch_em.data.datasets.electron_microscopy.mousecc

MouseCC is a dataset for myelin and intra-axonal space segmentation in 3D SEM images of the mouse brain genu of corpus callosum.

The volume contains 200 SEM slices at 24 x 24 x 100 nm resolution (36 x 48 x 20 um). Two segmentation labels are provided:

  • myelin: binary mask from a pixel-wise classifier.
  • fibers: intra-axonal space, uniquely labeled per axon via random-walker segmentation.

This dataset is from the publication https://doi.org/10.1007/s00429-019-01844-6. Please cite it if you use this dataset in your research.

The data is listed at https://datacatalog.med.nyu.edu/dataset/10432. It requires manual download after agreeing to the terms of use at: https://cai2r.net/resources/intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/

After downloading, place the following four files in a local directory and pass it as path:

  • datac.nii
  • maskc.nii
  • myelin_mask.nii
  • fibers.nii
  1"""MouseCC is a dataset for myelin and intra-axonal space segmentation in 3D SEM images
  2of the mouse brain genu of corpus callosum.
  3
  4The volume contains 200 SEM slices at 24 x 24 x 100 nm resolution (36 x 48 x 20 um).
  5Two segmentation labels are provided:
  6- myelin: binary mask from a pixel-wise classifier.
  7- fibers: intra-axonal space, uniquely labeled per axon via random-walker segmentation.
  8
  9This dataset is from the publication https://doi.org/10.1007/s00429-019-01844-6.
 10Please cite it if you use this dataset in your research.
 11
 12The data is listed at https://datacatalog.med.nyu.edu/dataset/10432.
 13It requires manual download after agreeing to the terms of use at:
 14https://cai2r.net/resources/intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/
 15
 16After downloading, place the following four files in a local directory and pass it as `path`:
 17- datac.nii
 18- maskc.nii
 19- myelin_mask.nii
 20- fibers.nii
 21"""
 22
 23import os
 24from typing import Literal, Tuple, Union
 25
 26import numpy as np
 27from torch.utils.data import Dataset, DataLoader
 28
 29import torch_em
 30
 31from .. import util
 32
 33
 34MOUSECC_FILES = ["datac.nii", "maskc.nii", "myelin_mask.nii", "fibers.nii"]
 35MOUSECC_DOWNLOAD_URL = (
 36    "https://cai2r.net/resources/"
 37    "intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/"
 38)
 39
 40
 41def _require_mousecc_files(path):
 42    missing = [f for f in MOUSECC_FILES if not os.path.exists(os.path.join(str(path), f))]
 43    if missing:
 44        raise RuntimeError(
 45            f"MouseCC files not found in {path}: {missing}\n"
 46            "This dataset requires manual download. Please fill in the form at:\n"
 47            f"{MOUSECC_DOWNLOAD_URL}\n"
 48            "and place datac.nii, maskc.nii, myelin_mask.nii, fibers.nii in the path directory."
 49        )
 50
 51
 52def _convert_to_h5(path):
 53    import h5py
 54    try:
 55        import nibabel as nib
 56    except ImportError:
 57        raise RuntimeError("nibabel is required to process MouseCC data: pip install nibabel")
 58
 59    h5_path = os.path.join(str(path), "mousecc.h5")
 60    if os.path.exists(h5_path):
 61        return h5_path
 62
 63    _require_mousecc_files(path)
 64
 65    def load_nii(fname, dtype):
 66        # nibabel returns (x, y, z); transpose to torch-em convention (z, y, x).
 67        data = nib.load(os.path.join(str(path), fname)).get_fdata().astype(dtype)
 68        return np.moveaxis(data, -1, 0)
 69
 70    raw = load_nii("datac.nii", "uint8")
 71    foreground = load_nii("maskc.nii", "uint8")
 72    myelin = load_nii("myelin_mask.nii", "uint8")
 73    fibers = load_nii("fibers.nii", "uint32")
 74
 75    with h5py.File(h5_path, "w") as f:
 76        f.create_dataset("raw", data=raw, compression="gzip")
 77        f.create_dataset("labels/foreground", data=foreground, compression="gzip")
 78        f.create_dataset("labels/myelin", data=myelin, compression="gzip")
 79        f.create_dataset("labels/fibers", data=fibers, compression="gzip")
 80
 81    return h5_path
 82
 83
 84def get_mousecc_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 85    """Prepare the MouseCC data as an HDF5 file.
 86
 87    Args:
 88        path: Filepath to the folder containing the manually downloaded NIfTI files.
 89        download: Ignored - this dataset requires manual download.
 90
 91    Returns:
 92        Path to the converted HDF5 file.
 93    """
 94    os.makedirs(str(path), exist_ok=True)
 95    if download:
 96        raise RuntimeError(
 97            "Automatic download is not supported for MouseCC.\n"
 98            "Please download the data manually from:\n"
 99            f"{MOUSECC_DOWNLOAD_URL}"
100        )
101    return _convert_to_h5(path)
102
103
104def get_mousecc_paths(
105    path: Union[os.PathLike, str],
106    label_choice: Literal["myelin", "fibers"] = "myelin",
107    download: bool = False,
108) -> Tuple[str, str]:
109    """Get paths to the MouseCC HDF5 data.
110
111    Args:
112        path: Filepath to the folder containing the manually downloaded NIfTI files.
113        label_choice: The segmentation target. Either 'myelin' or 'fibers'.
114        download: Ignored - this dataset requires manual download.
115
116    Returns:
117        Path to the HDF5 file for raw data.
118        Path to the HDF5 file for labels.
119    """
120    if label_choice not in ("myelin", "fibers"):
121        raise ValueError(f"Invalid label_choice: '{label_choice}'. Choose 'myelin' or 'fibers'.")
122    h5_path = get_mousecc_data(path, download)
123    return h5_path, h5_path
124
125
126def get_mousecc_dataset(
127    path: Union[os.PathLike, str],
128    patch_shape: Tuple[int, ...],
129    label_choice: Literal["myelin", "fibers"] = "myelin",
130    download: bool = False,
131    **kwargs,
132) -> Dataset:
133    """Get the MouseCC dataset for myelin or intra-axonal space segmentation in SEM.
134
135    Args:
136        path: Filepath to the folder containing the manually downloaded NIfTI files.
137        patch_shape: The patch shape to use for training.
138        label_choice: The segmentation target. 'myelin' for the binary myelin mask,
139            or 'fibers' for the intra-axonal space with unique labels per axon.
140        download: Ignored - this dataset requires manual download.
141        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
142
143    Returns:
144        The segmentation dataset.
145    """
146    raw_path, label_path = get_mousecc_paths(path, label_choice, download)
147    return torch_em.default_segmentation_dataset(
148        raw_paths=raw_path, raw_key="raw",
149        label_paths=label_path, label_key=f"labels/{label_choice}",
150        patch_shape=patch_shape, **kwargs
151    )
152
153
154def get_mousecc_loader(
155    path: Union[os.PathLike, str],
156    patch_shape: Tuple[int, ...],
157    batch_size: int,
158    label_choice: Literal["myelin", "fibers"] = "myelin",
159    download: bool = False,
160    **kwargs,
161) -> DataLoader:
162    """Get the DataLoader for myelin or intra-axonal space segmentation in MouseCC SEM data.
163
164    Args:
165        path: Filepath to the folder containing the manually downloaded NIfTI files.
166        patch_shape: The patch shape to use for training.
167        batch_size: The batch size for training.
168        label_choice: The segmentation target. 'myelin' for the binary myelin mask,
169            or 'fibers' for the intra-axonal space with unique labels per axon.
170        download: Ignored - this dataset requires manual download.
171        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
172
173    Returns:
174        The DataLoader.
175    """
176    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
177    dataset = get_mousecc_dataset(
178        path, patch_shape, label_choice=label_choice, download=download, **ds_kwargs
179    )
180    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
MOUSECC_FILES = ['datac.nii', 'maskc.nii', 'myelin_mask.nii', 'fibers.nii']
MOUSECC_DOWNLOAD_URL = 'https://cai2r.net/resources/intra-axonal-space-segmented-from-3d-scanning-electron-microscopy-of-the-mouse-brain-genu-of-corpus-callosum/'
def get_mousecc_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 85def get_mousecc_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 86    """Prepare the MouseCC data as an HDF5 file.
 87
 88    Args:
 89        path: Filepath to the folder containing the manually downloaded NIfTI files.
 90        download: Ignored - this dataset requires manual download.
 91
 92    Returns:
 93        Path to the converted HDF5 file.
 94    """
 95    os.makedirs(str(path), exist_ok=True)
 96    if download:
 97        raise RuntimeError(
 98            "Automatic download is not supported for MouseCC.\n"
 99            "Please download the data manually from:\n"
100            f"{MOUSECC_DOWNLOAD_URL}"
101        )
102    return _convert_to_h5(path)

Prepare the MouseCC data as an HDF5 file.

Arguments:
  • path: Filepath to the folder containing the manually downloaded NIfTI files.
  • download: Ignored - this dataset requires manual download.
Returns:

Path to the converted HDF5 file.

def get_mousecc_paths( path: Union[os.PathLike, str], label_choice: Literal['myelin', 'fibers'] = 'myelin', download: bool = False) -> Tuple[str, str]:
105def get_mousecc_paths(
106    path: Union[os.PathLike, str],
107    label_choice: Literal["myelin", "fibers"] = "myelin",
108    download: bool = False,
109) -> Tuple[str, str]:
110    """Get paths to the MouseCC HDF5 data.
111
112    Args:
113        path: Filepath to the folder containing the manually downloaded NIfTI files.
114        label_choice: The segmentation target. Either 'myelin' or 'fibers'.
115        download: Ignored - this dataset requires manual download.
116
117    Returns:
118        Path to the HDF5 file for raw data.
119        Path to the HDF5 file for labels.
120    """
121    if label_choice not in ("myelin", "fibers"):
122        raise ValueError(f"Invalid label_choice: '{label_choice}'. Choose 'myelin' or 'fibers'.")
123    h5_path = get_mousecc_data(path, download)
124    return h5_path, h5_path

Get paths to the MouseCC HDF5 data.

Arguments:
  • path: Filepath to the folder containing the manually downloaded NIfTI files.
  • label_choice: The segmentation target. Either 'myelin' or 'fibers'.
  • download: Ignored - this dataset requires manual download.
Returns:

Path to the HDF5 file for raw data. Path to the HDF5 file for labels.

def get_mousecc_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], label_choice: Literal['myelin', 'fibers'] = 'myelin', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
127def get_mousecc_dataset(
128    path: Union[os.PathLike, str],
129    patch_shape: Tuple[int, ...],
130    label_choice: Literal["myelin", "fibers"] = "myelin",
131    download: bool = False,
132    **kwargs,
133) -> Dataset:
134    """Get the MouseCC dataset for myelin or intra-axonal space segmentation in SEM.
135
136    Args:
137        path: Filepath to the folder containing the manually downloaded NIfTI files.
138        patch_shape: The patch shape to use for training.
139        label_choice: The segmentation target. 'myelin' for the binary myelin mask,
140            or 'fibers' for the intra-axonal space with unique labels per axon.
141        download: Ignored - this dataset requires manual download.
142        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
143
144    Returns:
145        The segmentation dataset.
146    """
147    raw_path, label_path = get_mousecc_paths(path, label_choice, download)
148    return torch_em.default_segmentation_dataset(
149        raw_paths=raw_path, raw_key="raw",
150        label_paths=label_path, label_key=f"labels/{label_choice}",
151        patch_shape=patch_shape, **kwargs
152    )

Get the MouseCC dataset for myelin or intra-axonal space segmentation in SEM.

Arguments:
  • path: Filepath to the folder containing the manually downloaded NIfTI files.
  • patch_shape: The patch shape to use for training.
  • label_choice: The segmentation target. 'myelin' for the binary myelin mask, or 'fibers' for the intra-axonal space with unique labels per axon.
  • download: Ignored - this dataset requires manual download.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_mousecc_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], batch_size: int, label_choice: Literal['myelin', 'fibers'] = 'myelin', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
155def get_mousecc_loader(
156    path: Union[os.PathLike, str],
157    patch_shape: Tuple[int, ...],
158    batch_size: int,
159    label_choice: Literal["myelin", "fibers"] = "myelin",
160    download: bool = False,
161    **kwargs,
162) -> DataLoader:
163    """Get the DataLoader for myelin or intra-axonal space segmentation in MouseCC SEM data.
164
165    Args:
166        path: Filepath to the folder containing the manually downloaded NIfTI files.
167        patch_shape: The patch shape to use for training.
168        batch_size: The batch size for training.
169        label_choice: The segmentation target. 'myelin' for the binary myelin mask,
170            or 'fibers' for the intra-axonal space with unique labels per axon.
171        download: Ignored - this dataset requires manual download.
172        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
173
174    Returns:
175        The DataLoader.
176    """
177    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
178    dataset = get_mousecc_dataset(
179        path, patch_shape, label_choice=label_choice, download=download, **ds_kwargs
180    )
181    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the DataLoader for myelin or intra-axonal space segmentation in MouseCC SEM data.

Arguments:
  • path: Filepath to the folder containing the manually downloaded NIfTI files.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • label_choice: The segmentation target. 'myelin' for the binary myelin mask, or 'fibers' for the intra-axonal space with unique labels per axon.
  • download: Ignored - this dataset requires manual download.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.