torch_em.data.datasets.light_microscopy.mouse_embryo

This dataset contains confocal microscopy stacks of a mouse embryo with annotations for cell and nucleus segmentation.

This dataset is part of the publication https://doi.org/10.15252/embj.2022113280. Please cite it if you use this data in your research.

View Source

  1"""This dataset contains confocal microscopy stacks of a mouse embryo
  2with annotations for cell and nucleus segmentation.
  3
  4This dataset is part of the publication https://doi.org/10.15252/embj.2022113280.
  5Please cite it if you use this data in your research.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import List, Optional, Tuple, Union
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19URL = "https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1"
 20CHECKSUM = "bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622"
 21
 22
 23def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
 24    """Download the mouse embryo dataset.
 25
 26    Args:
 27        path: Filepath to a folder where the downloaded data will be saved.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        The filepath for the downloaded data.
 32    """
 33    if os.path.exists(path):
 34        return path
 35    os.makedirs(path, exist_ok=True)
 36    tmp_path = os.path.join(path, "mouse_embryo.zip")
 37    util.download_source(tmp_path, URL, download, CHECKSUM)
 38    util.unzip(tmp_path, path, remove=True)
 39    # Remove empty volume.
 40    os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5"))
 41    return path
 42
 43
 44def get_mouse_embryo_paths(path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> List[str]:
 45    """Get paths to the Mouse Embryo data.
 46
 47    Args:
 48        path: Filepath to a folder where the downloaded data will be saved.
 49        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
 50        split: The split to use for the dataset. Either 'train' or 'val'.
 51        download: Whether to download the data if it is not present.
 52
 53    Returns:
 54        List of filepaths for the stored data.
 55    """
 56    get_mouse_embryo_data(path, download)
 57
 58    # the naming of the data is inconsistent: membrane has val, nuclei has test;
 59    # we treat nuclei:test as val
 60    split_ = "test" if name == "nuclei" and split == "val" else split
 61    file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5"))
 62    file_paths.sort()
 63
 64    return file_paths
 65
 66
 67def get_mouse_embryo_dataset(
 68    path: Union[os.PathLike, str],
 69    name: str,
 70    split: str,
 71    patch_shape: Tuple[int, int],
 72    download: bool = False,
 73    offsets: Optional[List[List[int]]] = None,
 74    boundaries: bool = False,
 75    binary: bool = False,
 76    **kwargs,
 77) -> Dataset:
 78    """Get the mouse embryo dataset for cell or nucleus segmentation.
 79
 80    Args:
 81        path: Filepath to a folder where the downloaded data will be saved.
 82        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
 83        split: The split to use for the dataset. Either 'train' or 'val'.
 84        patch_shape: The patch shape to use for training.
 85        download: Whether to download the data if it is not present.
 86        offsets: Offset values for affinity computation used as target.
 87        boundaries: Whether to compute boundaries as the target.
 88        binary: Whether to use a binary segmentation target.
 89        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 90
 91    Returns:
 92       The segmentation dataset.
 93    """
 94    assert name in ("membrane", "nuclei")
 95    assert split in ("train", "val")
 96    assert len(patch_shape) == 3
 97
 98    file_paths = get_mouse_embryo_paths(path, name, split, download)
 99
100    kwargs, _ = util.add_instance_label_transform(
101        kwargs,
102        add_binary_target=binary,
103        binary=binary,
104        boundaries=boundaries,
105        offsets=offsets,
106        binary_is_exclusive=False
107    )
108
109    return torch_em.default_segmentation_dataset(
110        raw_paths=file_paths,
111        raw_key="raw",
112        label_paths=file_paths,
113        label_key="label",
114        patch_shape=patch_shape,
115        **kwargs
116    )
117
118
119def get_mouse_embryo_loader(
120    path: Union[os.PathLike, str],
121    name: str,
122    split: str,
123    patch_shape: Tuple[int, int, int],
124    batch_size: int,
125    download: bool = False,
126    offsets: Optional[List[List[int]]] = None,
127    boundaries: bool = False,
128    binary: bool = False,
129    **kwargs,
130) -> DataLoader:
131    """Get the mouse embryo dataloader for cell or nucleus segmentation.
132
133    Args:
134        path: Filepath to a folder where the downloaded data will be saved.
135        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
136        split: The split to use for the dataset. Either 'train' or 'val'.
137        patch_shape: The patch shape to use for training.
138        batch_size: The batch size for training.
139        download: Whether to download the data if it is not present.
140        offsets: Offset values for affinity computation used as target.
141        boundaries: Whether to compute boundaries as the target.
142        binary: Whether to use a binary segmentation target.
143        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
144
145    Returns:
146        The DataLoader.
147    """
148    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
149    dataset = get_mouse_embryo_dataset(
150        path, name, split, patch_shape, download=download, offsets=offsets,
151        boundaries=boundaries, binary=binary, **ds_kwargs
152    )
153    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1'

CHECKSUM = 'bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622'

def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str: View Source

24def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
25    """Download the mouse embryo dataset.
26
27    Args:
28        path: Filepath to a folder where the downloaded data will be saved.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        The filepath for the downloaded data.
33    """
34    if os.path.exists(path):
35        return path
36    os.makedirs(path, exist_ok=True)
37    tmp_path = os.path.join(path, "mouse_embryo.zip")
38    util.download_source(tmp_path, URL, download, CHECKSUM)
39    util.unzip(tmp_path, path, remove=True)
40    # Remove empty volume.
41    os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5"))
42    return path

Download the mouse embryo dataset.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

The filepath for the downloaded data.

def get_mouse_embryo_paths( path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> List[str]: View Source

45def get_mouse_embryo_paths(path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> List[str]:
46    """Get paths to the Mouse Embryo data.
47
48    Args:
49        path: Filepath to a folder where the downloaded data will be saved.
50        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
51        split: The split to use for the dataset. Either 'train' or 'val'.
52        download: Whether to download the data if it is not present.
53
54    Returns:
55        List of filepaths for the stored data.
56    """
57    get_mouse_embryo_data(path, download)
58
59    # the naming of the data is inconsistent: membrane has val, nuclei has test;
60    # we treat nuclei:test as val
61    split_ = "test" if name == "nuclei" and split == "val" else split
62    file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5"))
63    file_paths.sort()
64
65    return file_paths

Get paths to the Mouse Embryo data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
split: The split to use for the dataset. Either 'train' or 'val'.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the stored data.

def get_mouse_embryo_dataset( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 68def get_mouse_embryo_dataset(
 69    path: Union[os.PathLike, str],
 70    name: str,
 71    split: str,
 72    patch_shape: Tuple[int, int],
 73    download: bool = False,
 74    offsets: Optional[List[List[int]]] = None,
 75    boundaries: bool = False,
 76    binary: bool = False,
 77    **kwargs,
 78) -> Dataset:
 79    """Get the mouse embryo dataset for cell or nucleus segmentation.
 80
 81    Args:
 82        path: Filepath to a folder where the downloaded data will be saved.
 83        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
 84        split: The split to use for the dataset. Either 'train' or 'val'.
 85        patch_shape: The patch shape to use for training.
 86        download: Whether to download the data if it is not present.
 87        offsets: Offset values for affinity computation used as target.
 88        boundaries: Whether to compute boundaries as the target.
 89        binary: Whether to use a binary segmentation target.
 90        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 91
 92    Returns:
 93       The segmentation dataset.
 94    """
 95    assert name in ("membrane", "nuclei")
 96    assert split in ("train", "val")
 97    assert len(patch_shape) == 3
 98
 99    file_paths = get_mouse_embryo_paths(path, name, split, download)
100
101    kwargs, _ = util.add_instance_label_transform(
102        kwargs,
103        add_binary_target=binary,
104        binary=binary,
105        boundaries=boundaries,
106        offsets=offsets,
107        binary_is_exclusive=False
108    )
109
110    return torch_em.default_segmentation_dataset(
111        raw_paths=file_paths,
112        raw_key="raw",
113        label_paths=file_paths,
114        label_key="label",
115        patch_shape=patch_shape,
116        **kwargs
117    )

Get the mouse embryo dataset for cell or nucleus segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
split: The split to use for the dataset. Either 'train' or 'val'.
patch_shape: The patch shape to use for training.
download: Whether to download the data if it is not present.
offsets: Offset values for affinity computation used as target.
boundaries: Whether to compute boundaries as the target.
binary: Whether to use a binary segmentation target.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_mouse_embryo_loader( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

120def get_mouse_embryo_loader(
121    path: Union[os.PathLike, str],
122    name: str,
123    split: str,
124    patch_shape: Tuple[int, int, int],
125    batch_size: int,
126    download: bool = False,
127    offsets: Optional[List[List[int]]] = None,
128    boundaries: bool = False,
129    binary: bool = False,
130    **kwargs,
131) -> DataLoader:
132    """Get the mouse embryo dataloader for cell or nucleus segmentation.
133
134    Args:
135        path: Filepath to a folder where the downloaded data will be saved.
136        name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
137        split: The split to use for the dataset. Either 'train' or 'val'.
138        patch_shape: The patch shape to use for training.
139        batch_size: The batch size for training.
140        download: Whether to download the data if it is not present.
141        offsets: Offset values for affinity computation used as target.
142        boundaries: Whether to compute boundaries as the target.
143        binary: Whether to use a binary segmentation target.
144        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
145
146    Returns:
147        The DataLoader.
148    """
149    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
150    dataset = get_mouse_embryo_dataset(
151        path, name, split, patch_shape, download=download, offsets=offsets,
152        boundaries=boundaries, binary=binary, **ds_kwargs
153    )
154    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the mouse embryo dataloader for cell or nucleus segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
split: The split to use for the dataset. Either 'train' or 'val'.
patch_shape: The patch shape to use for training.
batch_size: The batch size for training.
download: Whether to download the data if it is not present.
offsets: Offset values for affinity computation used as target.
boundaries: Whether to compute boundaries as the target.
binary: Whether to use a binary segmentation target.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.