torch_em.data.datasets.light_microscopy.mouse_embryo

This dataset contains confocal microscopy stacks of a mouse embryo with annotations for cell and nucleus segmentation.

This dataset is part of the publication https://doi.org/10.15252/embj.2022113280. Please cite it if you use this data in your research.

  1"""This dataset contains confocal microscopy stacks of a mouse embryo
  2with annotations for cell and nucleus segmentation.
  3
  4This dataset is part of the publication https://doi.org/10.15252/embj.2022113280.
  5Please cite it if you use this data in your research.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import List, Optional, Tuple, Union
 11
 12import torch_em
 13from torch.utils.data import Dataset, DataLoader
 14from .. import util
 15
 16URL = "https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1"
 17CHECKSUM = "bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622"
 18
 19
 20def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
 21    """Download the mouse embryo dataset.
 22
 23    Args:
 24        path: Filepath to a folder where the downloaded data will be saved.
 25        download: Whether to download the data if it is not present.
 26
 27    Returns:
 28        The filepath for the downloaded data.
 29    """
 30    if os.path.exists(path):
 31        return path
 32    os.makedirs(path, exist_ok=True)
 33    tmp_path = os.path.join(path, "mouse_embryo.zip")
 34    util.download_source(tmp_path, URL, download, CHECKSUM)
 35    util.unzip(tmp_path, path, remove=True)
 36    # Remove empty volume.
 37    os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5"))
 38    return path
 39
 40
 41def get_mouse_embryo_dataset(
 42    path: Union[os.PathLike, str],
 43    name: str,
 44    split: str,
 45    patch_shape: Tuple[int, int],
 46    download: bool = False,
 47    offsets: Optional[List[List[int]]] = None,
 48    boundaries: bool = False,
 49    binary: bool = False,
 50    **kwargs,
 51) -> Dataset:
 52    """Get the mouse embryo dataset for cell or nucleus segmentation.
 53
 54    Args:
 55        path: Filepath to a folder where the downloaded data will be saved.
 56        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
 57        split: The split to use for the dataset. Either 'train' or 'val'.
 58        patch_shape: The patch shape to use for training.
 59        download: Whether to download the data if it is not present.
 60        offsets: Offset values for affinity computation used as target.
 61        boundaries: Whether to compute boundaries as the target.
 62        binary: Whether to use a binary segmentation target.
 63        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 64
 65    Returns:
 66       The segmentation dataset.
 67    """
 68    assert name in ("membrane", "nuclei")
 69    assert split in ("train", "val")
 70    assert len(patch_shape) == 3
 71    get_mouse_embryo_data(path, download)
 72
 73    # the naming of the data is inconsistent: membrane has val, nuclei has test;
 74    # we treat nuclei:test as val
 75    split_ = "test" if name == "nuclei" and split == "val" else split
 76    file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5"))
 77    file_paths.sort()
 78
 79    kwargs, _ = util.add_instance_label_transform(
 80        kwargs, add_binary_target=binary, binary=binary, boundaries=boundaries,
 81        offsets=offsets, binary_is_exclusive=False
 82    )
 83
 84    raw_key, label_key = "raw", "label"
 85    return torch_em.default_segmentation_dataset(file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs)
 86
 87
 88def get_mouse_embryo_loader(
 89    path: Union[os.PathLike, str],
 90    name: str,
 91    split: str,
 92    patch_shape: Tuple[int, int, int],
 93    batch_size: int,
 94    download: bool = False,
 95    offsets: Optional[List[List[int]]] = None,
 96    boundaries: bool = False,
 97    binary: bool = False,
 98    **kwargs,
 99) -> DataLoader:
100    """Get the mouse embryo dataset for cell or nucleus segmentation.
101
102    Args:
103        path: Filepath to a folder where the downloaded data will be saved.
104        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
105        split: The split to use for the dataset. Either 'train' or 'val'.
106        patch_shape: The patch shape to use for training.
107        batch_size: The batch size for training.
108        download: Whether to download the data if it is not present.
109        offsets: Offset values for affinity computation used as target.
110        boundaries: Whether to compute boundaries as the target.
111        binary: Whether to use a binary segmentation target.
112        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
113
114    Returns:
115        The DataLoader.
116    """
117    ds_kwargs, loader_kwargs = util.split_kwargs(
118        torch_em.default_segmentation_dataset, **kwargs
119    )
120    dataset = get_mouse_embryo_dataset(
121        path, name, split, patch_shape,
122        download=download, offsets=offsets, boundaries=boundaries, binary=binary,
123        **ds_kwargs
124    )
125    loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
126    return loader
URL = 'https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1'
CHECKSUM = 'bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622'
def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
21def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
22    """Download the mouse embryo dataset.
23
24    Args:
25        path: Filepath to a folder where the downloaded data will be saved.
26        download: Whether to download the data if it is not present.
27
28    Returns:
29        The filepath for the downloaded data.
30    """
31    if os.path.exists(path):
32        return path
33    os.makedirs(path, exist_ok=True)
34    tmp_path = os.path.join(path, "mouse_embryo.zip")
35    util.download_source(tmp_path, URL, download, CHECKSUM)
36    util.unzip(tmp_path, path, remove=True)
37    # Remove empty volume.
38    os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5"))
39    return path

Download the mouse embryo dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath for the downloaded data.

def get_mouse_embryo_dataset( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
42def get_mouse_embryo_dataset(
43    path: Union[os.PathLike, str],
44    name: str,
45    split: str,
46    patch_shape: Tuple[int, int],
47    download: bool = False,
48    offsets: Optional[List[List[int]]] = None,
49    boundaries: bool = False,
50    binary: bool = False,
51    **kwargs,
52) -> Dataset:
53    """Get the mouse embryo dataset for cell or nucleus segmentation.
54
55    Args:
56        path: Filepath to a folder where the downloaded data will be saved.
57        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
58        split: The split to use for the dataset. Either 'train' or 'val'.
59        patch_shape: The patch shape to use for training.
60        download: Whether to download the data if it is not present.
61        offsets: Offset values for affinity computation used as target.
62        boundaries: Whether to compute boundaries as the target.
63        binary: Whether to use a binary segmentation target.
64        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
65
66    Returns:
67       The segmentation dataset.
68    """
69    assert name in ("membrane", "nuclei")
70    assert split in ("train", "val")
71    assert len(patch_shape) == 3
72    get_mouse_embryo_data(path, download)
73
74    # the naming of the data is inconsistent: membrane has val, nuclei has test;
75    # we treat nuclei:test as val
76    split_ = "test" if name == "nuclei" and split == "val" else split
77    file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5"))
78    file_paths.sort()
79
80    kwargs, _ = util.add_instance_label_transform(
81        kwargs, add_binary_target=binary, binary=binary, boundaries=boundaries,
82        offsets=offsets, binary_is_exclusive=False
83    )
84
85    raw_key, label_key = "raw", "label"
86    return torch_em.default_segmentation_dataset(file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs)

Get the mouse embryo dataset for cell or nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
  • split: The split to use for the dataset. Either 'train' or 'val'.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_mouse_embryo_loader( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
 89def get_mouse_embryo_loader(
 90    path: Union[os.PathLike, str],
 91    name: str,
 92    split: str,
 93    patch_shape: Tuple[int, int, int],
 94    batch_size: int,
 95    download: bool = False,
 96    offsets: Optional[List[List[int]]] = None,
 97    boundaries: bool = False,
 98    binary: bool = False,
 99    **kwargs,
100) -> DataLoader:
101    """Get the mouse embryo dataset for cell or nucleus segmentation.
102
103    Args:
104        path: Filepath to a folder where the downloaded data will be saved.
105        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
106        split: The split to use for the dataset. Either 'train' or 'val'.
107        patch_shape: The patch shape to use for training.
108        batch_size: The batch size for training.
109        download: Whether to download the data if it is not present.
110        offsets: Offset values for affinity computation used as target.
111        boundaries: Whether to compute boundaries as the target.
112        binary: Whether to use a binary segmentation target.
113        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
114
115    Returns:
116        The DataLoader.
117    """
118    ds_kwargs, loader_kwargs = util.split_kwargs(
119        torch_em.default_segmentation_dataset, **kwargs
120    )
121    dataset = get_mouse_embryo_dataset(
122        path, name, split, patch_shape,
123        download=download, offsets=offsets, boundaries=boundaries, binary=binary,
124        **ds_kwargs
125    )
126    loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
127    return loader

Get the mouse embryo dataset for cell or nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
  • split: The split to use for the dataset. Either 'train' or 'val'.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.