torch_em.data.datasets.electron_microscopy.microns_nuclei
This dataset contains crops of EM data with annotated nuclei from mouse cortex.
The data is extracted from https://doi.org/10.1038/s41586-025-08790-w, which contains a segmentation of all nuclei in the cubic millimeter of mouse cortex imaged as part of cortex. Please cite it if you use this dataset for a publication.
1"""This dataset contains crops of EM data with annotated nuclei from mouse cortex. 2 3The data is extracted from https://doi.org/10.1038/s41586-025-08790-w, which contains a segmentation 4of all nuclei in the cubic millimeter of mouse cortex imaged as part of cortex. 5Please cite it if you use this dataset for a publication. 6""" 7 8import os 9from glob import glob 10from typing import Tuple, Union, Literal, List 11 12import torch_em 13 14from torch.utils.data import Dataset, DataLoader 15 16from .. import util 17 18 19URL = "https://owncloud.gwdg.de/index.php/s/ToLGAzg1FAV4Sxf/download" 20CHECKSUM = "36afcc963aea597faf991f6844537d2330739a89aa05c1a91fea31f2b4dc2de4" 21 22 23def get_microns_nuclei_data( 24 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool 25) -> str: 26 """Download the MICRONS Nucleus data. 27 28 Args: 29 path: Filepath to a folder where the downloaded data will be saved. 30 split: The split to use. One of 'train', 'val', 'test'. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 The filepath to the downloaded data. 35 """ 36 assert split in ("train", "val", "test") 37 split_folder = os.path.join(path, split) 38 if not os.path.exists(split_folder): 39 os.makedirs(path, exist_ok=True) 40 zip_path = os.path.join(path, "microns_nucleus_data.zip") 41 util.download_source(zip_path, URL, download, CHECKSUM) 42 util.unzip(zip_path, path, remove=True) 43 return split_folder 44 45 46def get_microns_nuclei_paths( 47 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool 48) -> List[str]: 49 """Get paths to the MICRONS Nucleus data. 50 51 Args: 52 path: Filepath to a folder where the downloaded data will be saved. 53 split: The split to use. One of 'train', 'val', 'test'. 54 download: Whether to download the data if it is not present. 55 56 Returns: 57 The filepaths to the stored data. 58 """ 59 get_microns_nuclei_data(path, split, download) 60 split_folder = os.path.join(path, split) 61 paths = sorted(glob(os.path.join(split_folder, "*.h5"))) 62 return paths 63 64 65def get_microns_nuclei_dataset( 66 path: Union[os.PathLike, str], 67 split: Literal["train", "val", "test"], 68 patch_shape: Tuple[int, int, int], 69 download: bool = False, 70 **kwargs 71) -> Dataset: 72 """Get the MICRONS nucleus dataset for the segmentation of nuclei in EM. 73 74 Args: 75 path: Filepath to a folder where the downloaded data will be saved. 76 split: The split for the dataset, either 'train, 'val', or 'test'. 77 patch_shape: The patch shape to use for training. 78 download: Whether to download the data if it is not present. 79 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 80 81 Returns: 82 The segmentation dataset. 83 """ 84 paths = get_microns_nuclei_paths(path, split, download) 85 return torch_em.default_segmentation_dataset( 86 raw_paths=paths, 87 raw_key="raw", 88 label_paths=paths, 89 label_key="labels/nuclei", 90 patch_shape=patch_shape, 91 is_seg_dataset=True, 92 **kwargs 93 ) 94 95 96def get_microns_nuclei_loader( 97 path: Union[os.PathLike, str], 98 split: Literal["train", "val", "test"], 99 patch_shape: Tuple[int, int, int], 100 batch_size: int, 101 download: bool = False, 102 **kwargs 103) -> DataLoader: 104 """Get the MICRONS nucleus dataloader for the segmentation of nuclei in EM. 105 106 Args: 107 path: Filepath to a folder where the downloaded data will be saved. 108 split: The split for the dataset, either 'train', 'val', or 'test'. 109 patch_shape: The patch shape to use for training. 110 batch_size: The batch size for training. 111 download: Whether to download the data if it is not present. 112 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 113 114 Returns: 115 The segmentation dataset. 116 """ 117 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 118 ds = get_microns_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs) 119 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL =
'https://owncloud.gwdg.de/index.php/s/ToLGAzg1FAV4Sxf/download'
CHECKSUM =
'36afcc963aea597faf991f6844537d2330739a89aa05c1a91fea31f2b4dc2de4'
def
get_microns_nuclei_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> str:
24def get_microns_nuclei_data( 25 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool 26) -> str: 27 """Download the MICRONS Nucleus data. 28 29 Args: 30 path: Filepath to a folder where the downloaded data will be saved. 31 split: The split to use. One of 'train', 'val', 'test'. 32 download: Whether to download the data if it is not present. 33 34 Returns: 35 The filepath to the downloaded data. 36 """ 37 assert split in ("train", "val", "test") 38 split_folder = os.path.join(path, split) 39 if not os.path.exists(split_folder): 40 os.makedirs(path, exist_ok=True) 41 zip_path = os.path.join(path, "microns_nucleus_data.zip") 42 util.download_source(zip_path, URL, download, CHECKSUM) 43 util.unzip(zip_path, path, remove=True) 44 return split_folder
Download the MICRONS Nucleus data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use. One of 'train', 'val', 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the downloaded data.
def
get_microns_nuclei_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> List[str]:
47def get_microns_nuclei_paths( 48 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool 49) -> List[str]: 50 """Get paths to the MICRONS Nucleus data. 51 52 Args: 53 path: Filepath to a folder where the downloaded data will be saved. 54 split: The split to use. One of 'train', 'val', 'test'. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 The filepaths to the stored data. 59 """ 60 get_microns_nuclei_data(path, split, download) 61 split_folder = os.path.join(path, split) 62 paths = sorted(glob(os.path.join(split_folder, "*.h5"))) 63 return paths
Get paths to the MICRONS Nucleus data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use. One of 'train', 'val', 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepaths to the stored data.
def
get_microns_nuclei_dataset( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
66def get_microns_nuclei_dataset( 67 path: Union[os.PathLike, str], 68 split: Literal["train", "val", "test"], 69 patch_shape: Tuple[int, int, int], 70 download: bool = False, 71 **kwargs 72) -> Dataset: 73 """Get the MICRONS nucleus dataset for the segmentation of nuclei in EM. 74 75 Args: 76 path: Filepath to a folder where the downloaded data will be saved. 77 split: The split for the dataset, either 'train, 'val', or 'test'. 78 patch_shape: The patch shape to use for training. 79 download: Whether to download the data if it is not present. 80 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 81 82 Returns: 83 The segmentation dataset. 84 """ 85 paths = get_microns_nuclei_paths(path, split, download) 86 return torch_em.default_segmentation_dataset( 87 raw_paths=paths, 88 raw_key="raw", 89 label_paths=paths, 90 label_key="labels/nuclei", 91 patch_shape=patch_shape, 92 is_seg_dataset=True, 93 **kwargs 94 )
Get the MICRONS nucleus dataset for the segmentation of nuclei in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split for the dataset, either 'train, 'val', or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_microns_nuclei_loader( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
97def get_microns_nuclei_loader( 98 path: Union[os.PathLike, str], 99 split: Literal["train", "val", "test"], 100 patch_shape: Tuple[int, int, int], 101 batch_size: int, 102 download: bool = False, 103 **kwargs 104) -> DataLoader: 105 """Get the MICRONS nucleus dataloader for the segmentation of nuclei in EM. 106 107 Args: 108 path: Filepath to a folder where the downloaded data will be saved. 109 split: The split for the dataset, either 'train', 'val', or 'test'. 110 patch_shape: The patch shape to use for training. 111 batch_size: The batch size for training. 112 download: Whether to download the data if it is not present. 113 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 114 115 Returns: 116 The segmentation dataset. 117 """ 118 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 119 ds = get_microns_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs) 120 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the MICRONS nucleus dataloader for the segmentation of nuclei in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split for the dataset, either 'train', 'val', or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The segmentation dataset.