torch_em.data.datasets.light_microscopy.mouse_embryo
This dataset contains confocal microscopy stacks of a mouse embryo with annotations for cell and nucleus segmentation.
This dataset is part of the publication https://doi.org/10.15252/embj.2022113280. Please cite it if you use this data in your research.
1"""This dataset contains confocal microscopy stacks of a mouse embryo 2with annotations for cell and nucleus segmentation. 3 4This dataset is part of the publication https://doi.org/10.15252/embj.2022113280. 5Please cite it if you use this data in your research. 6""" 7 8import os 9from glob import glob 10from typing import List, Optional, Tuple, Union 11 12import torch_em 13from torch.utils.data import Dataset, DataLoader 14from .. import util 15 16URL = "https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1" 17CHECKSUM = "bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622" 18 19 20def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str: 21 """Download the mouse embryo dataset. 22 23 Args: 24 path: Filepath to a folder where the downloaded data will be saved. 25 download: Whether to download the data if it is not present. 26 27 Returns: 28 The filepath for the downloaded data. 29 """ 30 if os.path.exists(path): 31 return path 32 os.makedirs(path, exist_ok=True) 33 tmp_path = os.path.join(path, "mouse_embryo.zip") 34 util.download_source(tmp_path, URL, download, CHECKSUM) 35 util.unzip(tmp_path, path, remove=True) 36 # Remove empty volume. 37 os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5")) 38 return path 39 40 41def get_mouse_embryo_dataset( 42 path: Union[os.PathLike, str], 43 name: str, 44 split: str, 45 patch_shape: Tuple[int, int], 46 download: bool = False, 47 offsets: Optional[List[List[int]]] = None, 48 boundaries: bool = False, 49 binary: bool = False, 50 **kwargs, 51) -> Dataset: 52 """Get the mouse embryo dataset for cell or nucleus segmentation. 53 54 Args: 55 path: Filepath to a folder where the downloaded data will be saved. 56 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 57 split: The split to use for the dataset. Either 'train' or 'val'. 58 patch_shape: The patch shape to use for training. 59 download: Whether to download the data if it is not present. 60 offsets: Offset values for affinity computation used as target. 61 boundaries: Whether to compute boundaries as the target. 62 binary: Whether to use a binary segmentation target. 63 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 64 65 Returns: 66 The segmentation dataset. 67 """ 68 assert name in ("membrane", "nuclei") 69 assert split in ("train", "val") 70 assert len(patch_shape) == 3 71 get_mouse_embryo_data(path, download) 72 73 # the naming of the data is inconsistent: membrane has val, nuclei has test; 74 # we treat nuclei:test as val 75 split_ = "test" if name == "nuclei" and split == "val" else split 76 file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5")) 77 file_paths.sort() 78 79 kwargs, _ = util.add_instance_label_transform( 80 kwargs, add_binary_target=binary, binary=binary, boundaries=boundaries, 81 offsets=offsets, binary_is_exclusive=False 82 ) 83 84 raw_key, label_key = "raw", "label" 85 return torch_em.default_segmentation_dataset(file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs) 86 87 88def get_mouse_embryo_loader( 89 path: Union[os.PathLike, str], 90 name: str, 91 split: str, 92 patch_shape: Tuple[int, int, int], 93 batch_size: int, 94 download: bool = False, 95 offsets: Optional[List[List[int]]] = None, 96 boundaries: bool = False, 97 binary: bool = False, 98 **kwargs, 99) -> DataLoader: 100 """Get the mouse embryo dataset for cell or nucleus segmentation. 101 102 Args: 103 path: Filepath to a folder where the downloaded data will be saved. 104 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 105 split: The split to use for the dataset. Either 'train' or 'val'. 106 patch_shape: The patch shape to use for training. 107 batch_size: The batch size for training. 108 download: Whether to download the data if it is not present. 109 offsets: Offset values for affinity computation used as target. 110 boundaries: Whether to compute boundaries as the target. 111 binary: Whether to use a binary segmentation target. 112 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 113 114 Returns: 115 The DataLoader. 116 """ 117 ds_kwargs, loader_kwargs = util.split_kwargs( 118 torch_em.default_segmentation_dataset, **kwargs 119 ) 120 dataset = get_mouse_embryo_dataset( 121 path, name, split, patch_shape, 122 download=download, offsets=offsets, boundaries=boundaries, binary=binary, 123 **ds_kwargs 124 ) 125 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 126 return loader
URL =
'https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1'
CHECKSUM =
'bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622'
def
get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
21def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str: 22 """Download the mouse embryo dataset. 23 24 Args: 25 path: Filepath to a folder where the downloaded data will be saved. 26 download: Whether to download the data if it is not present. 27 28 Returns: 29 The filepath for the downloaded data. 30 """ 31 if os.path.exists(path): 32 return path 33 os.makedirs(path, exist_ok=True) 34 tmp_path = os.path.join(path, "mouse_embryo.zip") 35 util.download_source(tmp_path, URL, download, CHECKSUM) 36 util.unzip(tmp_path, path, remove=True) 37 # Remove empty volume. 38 os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5")) 39 return path
Download the mouse embryo dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the downloaded data.
def
get_mouse_embryo_dataset( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
42def get_mouse_embryo_dataset( 43 path: Union[os.PathLike, str], 44 name: str, 45 split: str, 46 patch_shape: Tuple[int, int], 47 download: bool = False, 48 offsets: Optional[List[List[int]]] = None, 49 boundaries: bool = False, 50 binary: bool = False, 51 **kwargs, 52) -> Dataset: 53 """Get the mouse embryo dataset for cell or nucleus segmentation. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 58 split: The split to use for the dataset. Either 'train' or 'val'. 59 patch_shape: The patch shape to use for training. 60 download: Whether to download the data if it is not present. 61 offsets: Offset values for affinity computation used as target. 62 boundaries: Whether to compute boundaries as the target. 63 binary: Whether to use a binary segmentation target. 64 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 65 66 Returns: 67 The segmentation dataset. 68 """ 69 assert name in ("membrane", "nuclei") 70 assert split in ("train", "val") 71 assert len(patch_shape) == 3 72 get_mouse_embryo_data(path, download) 73 74 # the naming of the data is inconsistent: membrane has val, nuclei has test; 75 # we treat nuclei:test as val 76 split_ = "test" if name == "nuclei" and split == "val" else split 77 file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5")) 78 file_paths.sort() 79 80 kwargs, _ = util.add_instance_label_transform( 81 kwargs, add_binary_target=binary, binary=binary, boundaries=boundaries, 82 offsets=offsets, binary_is_exclusive=False 83 ) 84 85 raw_key, label_key = "raw", "label" 86 return torch_em.default_segmentation_dataset(file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs)
Get the mouse embryo dataset for cell or nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
- split: The split to use for the dataset. Either 'train' or 'val'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_mouse_embryo_loader( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
89def get_mouse_embryo_loader( 90 path: Union[os.PathLike, str], 91 name: str, 92 split: str, 93 patch_shape: Tuple[int, int, int], 94 batch_size: int, 95 download: bool = False, 96 offsets: Optional[List[List[int]]] = None, 97 boundaries: bool = False, 98 binary: bool = False, 99 **kwargs, 100) -> DataLoader: 101 """Get the mouse embryo dataset for cell or nucleus segmentation. 102 103 Args: 104 path: Filepath to a folder where the downloaded data will be saved. 105 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 106 split: The split to use for the dataset. Either 'train' or 'val'. 107 patch_shape: The patch shape to use for training. 108 batch_size: The batch size for training. 109 download: Whether to download the data if it is not present. 110 offsets: Offset values for affinity computation used as target. 111 boundaries: Whether to compute boundaries as the target. 112 binary: Whether to use a binary segmentation target. 113 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 114 115 Returns: 116 The DataLoader. 117 """ 118 ds_kwargs, loader_kwargs = util.split_kwargs( 119 torch_em.default_segmentation_dataset, **kwargs 120 ) 121 dataset = get_mouse_embryo_dataset( 122 path, name, split, patch_shape, 123 download=download, offsets=offsets, boundaries=boundaries, binary=binary, 124 **ds_kwargs 125 ) 126 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 127 return loader
Get the mouse embryo dataset for cell or nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
- split: The split to use for the dataset. Either 'train' or 'val'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.