torch_em.data.datasets.electron_microscopy.fafb_nuclei

This dataset contains crops of EM data with annotated nuclei from the adult fruit fly brain.

The data is extracted from https://doi.org/10.1101/2021.11.04.467197, which contains a segmentation of all nuclei in the fruit fly brain. Please cite it if you use this dataset for a publication.

View Source

  1"""This dataset contains crops of EM data with annotated nuclei from the adult fruit fly brain.
  2
  3The data is extracted from https://doi.org/10.1101/2021.11.04.467197, which contains a segmentation
  4of all nuclei in the fruit fly brain. Please cite it if you use this dataset for a publication.
  5"""
  6
  7import os
  8from glob import glob
  9from typing import Tuple, Union, Literal, List
 10
 11import torch_em
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15from .. import util
 16
 17
 18URL = "https://owncloud.gwdg.de/index.php/s/PDrkO02w7FWLrsh/download"
 19CHECKSUM = "7f245866fa8ffb473f7bda1fbbea23d265183e4ada5e34a36ecd60475809dca3"
 20
 21
 22def get_fafb_nuclei_data(path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool) -> str:
 23    """Download the FAFB Nucleus data.
 24
 25    Args:
 26        path: Filepath to a folder where the downloaded data will be saved.
 27        split: The split to use. One of 'train', 'val', 'test'.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        The filepath to the downloaded data.
 32    """
 33    assert split in ("train", "val", "test")
 34    split_folder = os.path.join(path, split)
 35    if not os.path.exists(split_folder):
 36        os.makedirs(path, exist_ok=True)
 37        zip_path = os.path.join(path, "fafb_nucleus_data.zip")
 38        util.download_source(zip_path, URL, download, CHECKSUM)
 39        util.unzip(zip_path, path, remove=True)
 40    return split_folder
 41
 42
 43def get_fafb_nuclei_paths(
 44    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool
 45) -> List[str]:
 46    """Get paths to the FAFB Nucleus data.
 47
 48    Args:
 49        path: Filepath to a folder where the downloaded data will be saved.
 50        split: The split to use. One of 'train', 'val', 'test'.
 51        download: Whether to download the data if it is not present.
 52
 53    Returns:
 54        The filepaths to the stored data.
 55    """
 56    get_fafb_nuclei_data(path, split, download)
 57    split_folder = os.path.join(path, split)
 58    paths = sorted(glob(os.path.join(split_folder, "*.h5")))
 59    return paths
 60
 61
 62def get_fafb_nuclei_dataset(
 63    path: Union[os.PathLike, str],
 64    split: Literal["train", "val", "test"],
 65    patch_shape: Tuple[int, int, int],
 66    download: bool = False,
 67    **kwargs
 68) -> Dataset:
 69    """Get the FAFB nucleus dataset for the segmentation of nuclei in EM.
 70
 71    Args:
 72        path: Filepath to a folder where the downloaded data will be saved.
 73        split: The split for the dataset, either 'train, 'val', or 'test'.
 74        patch_shape: The patch shape to use for training.
 75        download: Whether to download the data if it is not present.
 76        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 77
 78    Returns:
 79       The segmentation dataset.
 80    """
 81    paths = get_fafb_nuclei_paths(path, split, download)
 82    return torch_em.default_segmentation_dataset(
 83        raw_paths=paths,
 84        raw_key="raw",
 85        label_paths=paths,
 86        label_key="labels/nuclei",
 87        patch_shape=patch_shape,
 88        is_seg_dataset=True,
 89        **kwargs
 90    )
 91
 92
 93def get_fafb_nuclei_loader(
 94    path: Union[os.PathLike, str],
 95    split: Literal["train", "val", "test"],
 96    patch_shape: Tuple[int, int, int],
 97    batch_size: int,
 98    download: bool = False,
 99    **kwargs
100) -> DataLoader:
101    """Get the FAFB nucleus dataloader for the segmentation of nuclei in EM.
102
103    Args:
104        path: Filepath to a folder where the downloaded data will be saved.
105        split: The split for the dataset, either 'train', 'val', or 'test'.
106        patch_shape: The patch shape to use for training.
107        batch_size: The batch size for training.
108        download: Whether to download the data if it is not present.
109        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
110
111    Returns:
112       The segmentation dataset.
113    """
114    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
115    ds = get_fafb_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs)
116    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

URL = 'https://owncloud.gwdg.de/index.php/s/PDrkO02w7FWLrsh/download'

CHECKSUM = '7f245866fa8ffb473f7bda1fbbea23d265183e4ada5e34a36ecd60475809dca3'

def get_fafb_nuclei_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> str: View Source

23def get_fafb_nuclei_data(path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool) -> str:
24    """Download the FAFB Nucleus data.
25
26    Args:
27        path: Filepath to a folder where the downloaded data will be saved.
28        split: The split to use. One of 'train', 'val', 'test'.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        The filepath to the downloaded data.
33    """
34    assert split in ("train", "val", "test")
35    split_folder = os.path.join(path, split)
36    if not os.path.exists(split_folder):
37        os.makedirs(path, exist_ok=True)
38        zip_path = os.path.join(path, "fafb_nucleus_data.zip")
39        util.download_source(zip_path, URL, download, CHECKSUM)
40        util.unzip(zip_path, path, remove=True)
41    return split_folder

Download the FAFB Nucleus data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split to use. One of 'train', 'val', 'test'.
download: Whether to download the data if it is not present.

Returns:

The filepath to the downloaded data.

def get_fafb_nuclei_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> List[str]: View Source

44def get_fafb_nuclei_paths(
45    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool
46) -> List[str]:
47    """Get paths to the FAFB Nucleus data.
48
49    Args:
50        path: Filepath to a folder where the downloaded data will be saved.
51        split: The split to use. One of 'train', 'val', 'test'.
52        download: Whether to download the data if it is not present.
53
54    Returns:
55        The filepaths to the stored data.
56    """
57    get_fafb_nuclei_data(path, split, download)
58    split_folder = os.path.join(path, split)
59    paths = sorted(glob(os.path.join(split_folder, "*.h5")))
60    return paths

Get paths to the FAFB Nucleus data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split to use. One of 'train', 'val', 'test'.
download: Whether to download the data if it is not present.

Returns:

The filepaths to the stored data.

def get_fafb_nuclei_dataset( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

63def get_fafb_nuclei_dataset(
64    path: Union[os.PathLike, str],
65    split: Literal["train", "val", "test"],
66    patch_shape: Tuple[int, int, int],
67    download: bool = False,
68    **kwargs
69) -> Dataset:
70    """Get the FAFB nucleus dataset for the segmentation of nuclei in EM.
71
72    Args:
73        path: Filepath to a folder where the downloaded data will be saved.
74        split: The split for the dataset, either 'train, 'val', or 'test'.
75        patch_shape: The patch shape to use for training.
76        download: Whether to download the data if it is not present.
77        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
78
79    Returns:
80       The segmentation dataset.
81    """
82    paths = get_fafb_nuclei_paths(path, split, download)
83    return torch_em.default_segmentation_dataset(
84        raw_paths=paths,
85        raw_key="raw",
86        label_paths=paths,
87        label_key="labels/nuclei",
88        patch_shape=patch_shape,
89        is_seg_dataset=True,
90        **kwargs
91    )

Get the FAFB nucleus dataset for the segmentation of nuclei in EM.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split for the dataset, either 'train, 'val', or 'test'.
patch_shape: The patch shape to use for training.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_fafb_nuclei_loader( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

 94def get_fafb_nuclei_loader(
 95    path: Union[os.PathLike, str],
 96    split: Literal["train", "val", "test"],
 97    patch_shape: Tuple[int, int, int],
 98    batch_size: int,
 99    download: bool = False,
100    **kwargs
101) -> DataLoader:
102    """Get the FAFB nucleus dataloader for the segmentation of nuclei in EM.
103
104    Args:
105        path: Filepath to a folder where the downloaded data will be saved.
106        split: The split for the dataset, either 'train', 'val', or 'test'.
107        patch_shape: The patch shape to use for training.
108        batch_size: The batch size for training.
109        download: Whether to download the data if it is not present.
110        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
111
112    Returns:
113       The segmentation dataset.
114    """
115    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
116    ds = get_fafb_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs)
117    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the FAFB nucleus dataloader for the segmentation of nuclei in EM.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split for the dataset, either 'train', 'val', or 'test'.
patch_shape: The patch shape to use for training.
batch_size: The batch size for training.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The segmentation dataset.