torch_em.data.datasets.electron_microscopy.microns_nuclei

This dataset contains crops of EM data with annotated nuclei from mouse cortex.

The data is extracted from https://doi.org/10.1038/s41586-025-08790-w, which contains a segmentation of all nuclei in the cubic millimeter of mouse cortex imaged as part of cortex. Please cite it if you use this dataset for a publication.

  1"""This dataset contains crops of EM data with annotated nuclei from mouse cortex.
  2
  3The data is extracted from https://doi.org/10.1038/s41586-025-08790-w, which contains a segmentation
  4of all nuclei in the cubic millimeter of mouse cortex imaged as part of cortex.
  5Please cite it if you use this dataset for a publication.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import Tuple, Union, Literal, List
 11
 12import torch_em
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16from .. import util
 17
 18
 19URL = "https://owncloud.gwdg.de/index.php/s/ToLGAzg1FAV4Sxf/download"
 20CHECKSUM = "36afcc963aea597faf991f6844537d2330739a89aa05c1a91fea31f2b4dc2de4"
 21
 22
 23def get_microns_nuclei_data(
 24    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool
 25) -> str:
 26    """Download the MICRONS Nucleus data.
 27
 28    Args:
 29        path: Filepath to a folder where the downloaded data will be saved.
 30        split: The split to use. One of 'train', 'val', 'test'.
 31        download: Whether to download the data if it is not present.
 32
 33    Returns:
 34        The filepath to the downloaded data.
 35    """
 36    assert split in ("train", "val", "test")
 37    split_folder = os.path.join(path, split)
 38    if not os.path.exists(split_folder):
 39        os.makedirs(path, exist_ok=True)
 40        zip_path = os.path.join(path, "microns_nucleus_data.zip")
 41        util.download_source(zip_path, URL, download, CHECKSUM)
 42        util.unzip(zip_path, path, remove=True)
 43    return split_folder
 44
 45
 46def get_microns_nuclei_paths(
 47    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool
 48) -> List[str]:
 49    """Get paths to the MICRONS Nucleus data.
 50
 51    Args:
 52        path: Filepath to a folder where the downloaded data will be saved.
 53        split: The split to use. One of 'train', 'val', 'test'.
 54        download: Whether to download the data if it is not present.
 55
 56    Returns:
 57        The filepaths to the stored data.
 58    """
 59    get_microns_nuclei_data(path, split, download)
 60    split_folder = os.path.join(path, split)
 61    paths = sorted(glob(os.path.join(split_folder, "*.h5")))
 62    return paths
 63
 64
 65def get_microns_nuclei_dataset(
 66    path: Union[os.PathLike, str],
 67    split: Literal["train", "val", "test"],
 68    patch_shape: Tuple[int, int, int],
 69    download: bool = False,
 70    **kwargs
 71) -> Dataset:
 72    """Get the MICRONS nucleus dataset for the segmentation of nuclei in EM.
 73
 74    Args:
 75        path: Filepath to a folder where the downloaded data will be saved.
 76        split: The split for the dataset, either 'train, 'val', or 'test'.
 77        patch_shape: The patch shape to use for training.
 78        download: Whether to download the data if it is not present.
 79        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 80
 81    Returns:
 82       The segmentation dataset.
 83    """
 84    paths = get_microns_nuclei_paths(path, split, download)
 85    return torch_em.default_segmentation_dataset(
 86        raw_paths=paths,
 87        raw_key="raw",
 88        label_paths=paths,
 89        label_key="labels/nuclei",
 90        patch_shape=patch_shape,
 91        is_seg_dataset=True,
 92        **kwargs
 93    )
 94
 95
 96def get_microns_nuclei_loader(
 97    path: Union[os.PathLike, str],
 98    split: Literal["train", "val", "test"],
 99    patch_shape: Tuple[int, int, int],
100    batch_size: int,
101    download: bool = False,
102    **kwargs
103) -> DataLoader:
104    """Get the MICRONS nucleus dataloader for the segmentation of nuclei in EM.
105
106    Args:
107        path: Filepath to a folder where the downloaded data will be saved.
108        split: The split for the dataset, either 'train', 'val', or 'test'.
109        patch_shape: The patch shape to use for training.
110        batch_size: The batch size for training.
111        download: Whether to download the data if it is not present.
112        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
113
114    Returns:
115       The segmentation dataset.
116    """
117    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
118    ds = get_microns_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs)
119    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL = 'https://owncloud.gwdg.de/index.php/s/ToLGAzg1FAV4Sxf/download'
CHECKSUM = '36afcc963aea597faf991f6844537d2330739a89aa05c1a91fea31f2b4dc2de4'
def get_microns_nuclei_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> str:
24def get_microns_nuclei_data(
25    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool
26) -> str:
27    """Download the MICRONS Nucleus data.
28
29    Args:
30        path: Filepath to a folder where the downloaded data will be saved.
31        split: The split to use. One of 'train', 'val', 'test'.
32        download: Whether to download the data if it is not present.
33
34    Returns:
35        The filepath to the downloaded data.
36    """
37    assert split in ("train", "val", "test")
38    split_folder = os.path.join(path, split)
39    if not os.path.exists(split_folder):
40        os.makedirs(path, exist_ok=True)
41        zip_path = os.path.join(path, "microns_nucleus_data.zip")
42        util.download_source(zip_path, URL, download, CHECKSUM)
43        util.unzip(zip_path, path, remove=True)
44    return split_folder

Download the MICRONS Nucleus data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use. One of 'train', 'val', 'test'.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the downloaded data.

def get_microns_nuclei_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> List[str]:
47def get_microns_nuclei_paths(
48    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool
49) -> List[str]:
50    """Get paths to the MICRONS Nucleus data.
51
52    Args:
53        path: Filepath to a folder where the downloaded data will be saved.
54        split: The split to use. One of 'train', 'val', 'test'.
55        download: Whether to download the data if it is not present.
56
57    Returns:
58        The filepaths to the stored data.
59    """
60    get_microns_nuclei_data(path, split, download)
61    split_folder = os.path.join(path, split)
62    paths = sorted(glob(os.path.join(split_folder, "*.h5")))
63    return paths

Get paths to the MICRONS Nucleus data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use. One of 'train', 'val', 'test'.
  • download: Whether to download the data if it is not present.
Returns:

The filepaths to the stored data.

def get_microns_nuclei_dataset( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
66def get_microns_nuclei_dataset(
67    path: Union[os.PathLike, str],
68    split: Literal["train", "val", "test"],
69    patch_shape: Tuple[int, int, int],
70    download: bool = False,
71    **kwargs
72) -> Dataset:
73    """Get the MICRONS nucleus dataset for the segmentation of nuclei in EM.
74
75    Args:
76        path: Filepath to a folder where the downloaded data will be saved.
77        split: The split for the dataset, either 'train, 'val', or 'test'.
78        patch_shape: The patch shape to use for training.
79        download: Whether to download the data if it is not present.
80        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
81
82    Returns:
83       The segmentation dataset.
84    """
85    paths = get_microns_nuclei_paths(path, split, download)
86    return torch_em.default_segmentation_dataset(
87        raw_paths=paths,
88        raw_key="raw",
89        label_paths=paths,
90        label_key="labels/nuclei",
91        patch_shape=patch_shape,
92        is_seg_dataset=True,
93        **kwargs
94    )

Get the MICRONS nucleus dataset for the segmentation of nuclei in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split for the dataset, either 'train, 'val', or 'test'.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_microns_nuclei_loader( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
 97def get_microns_nuclei_loader(
 98    path: Union[os.PathLike, str],
 99    split: Literal["train", "val", "test"],
100    patch_shape: Tuple[int, int, int],
101    batch_size: int,
102    download: bool = False,
103    **kwargs
104) -> DataLoader:
105    """Get the MICRONS nucleus dataloader for the segmentation of nuclei in EM.
106
107    Args:
108        path: Filepath to a folder where the downloaded data will be saved.
109        split: The split for the dataset, either 'train', 'val', or 'test'.
110        patch_shape: The patch shape to use for training.
111        batch_size: The batch size for training.
112        download: Whether to download the data if it is not present.
113        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
114
115    Returns:
116       The segmentation dataset.
117    """
118    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
119    ds = get_microns_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs)
120    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the MICRONS nucleus dataloader for the segmentation of nuclei in EM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split for the dataset, either 'train', 'val', or 'test'.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The segmentation dataset.