torch_em.data.datasets.light_microscopy.mouse_embryo
This dataset contains confocal microscopy stacks of a mouse embryo with annotations for cell and nucleus segmentation.
This dataset is part of the publication https://doi.org/10.15252/embj.2022113280. Please cite it if you use this data in your research.
1"""This dataset contains confocal microscopy stacks of a mouse embryo 2with annotations for cell and nucleus segmentation. 3 4This dataset is part of the publication https://doi.org/10.15252/embj.2022113280. 5Please cite it if you use this data in your research. 6""" 7 8import os 9from glob import glob 10from typing import List, Optional, Tuple, Union 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19URL = "https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1" 20CHECKSUM = "bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622" 21 22 23def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str: 24 """Download the mouse embryo dataset. 25 26 Args: 27 path: Filepath to a folder where the downloaded data will be saved. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 The filepath for the downloaded data. 32 """ 33 if os.path.exists(path): 34 return path 35 os.makedirs(path, exist_ok=True) 36 tmp_path = os.path.join(path, "mouse_embryo.zip") 37 util.download_source(tmp_path, URL, download, CHECKSUM) 38 util.unzip(tmp_path, path, remove=True) 39 # Remove empty volume. 40 os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5")) 41 return path 42 43 44def get_mouse_embryo_paths(path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> List[str]: 45 """Get paths to the Mouse Embryo data. 46 47 Args: 48 path: Filepath to a folder where the downloaded data will be saved. 49 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 50 split: The split to use for the dataset. Either 'train' or 'val'. 51 download: Whether to download the data if it is not present. 52 53 Returns: 54 List of filepaths for the stored data. 55 """ 56 get_mouse_embryo_data(path, download) 57 58 # the naming of the data is inconsistent: membrane has val, nuclei has test; 59 # we treat nuclei:test as val 60 split_ = "test" if name == "nuclei" and split == "val" else split 61 file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5")) 62 file_paths.sort() 63 64 return file_paths 65 66 67def get_mouse_embryo_dataset( 68 path: Union[os.PathLike, str], 69 name: str, 70 split: str, 71 patch_shape: Tuple[int, int], 72 download: bool = False, 73 offsets: Optional[List[List[int]]] = None, 74 boundaries: bool = False, 75 binary: bool = False, 76 **kwargs, 77) -> Dataset: 78 """Get the mouse embryo dataset for cell or nucleus segmentation. 79 80 Args: 81 path: Filepath to a folder where the downloaded data will be saved. 82 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 83 split: The split to use for the dataset. Either 'train' or 'val'. 84 patch_shape: The patch shape to use for training. 85 download: Whether to download the data if it is not present. 86 offsets: Offset values for affinity computation used as target. 87 boundaries: Whether to compute boundaries as the target. 88 binary: Whether to use a binary segmentation target. 89 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 90 91 Returns: 92 The segmentation dataset. 93 """ 94 assert name in ("membrane", "nuclei") 95 assert split in ("train", "val") 96 assert len(patch_shape) == 3 97 98 file_paths = get_mouse_embryo_paths(path, name, split, download) 99 100 kwargs, _ = util.add_instance_label_transform( 101 kwargs, 102 add_binary_target=binary, 103 binary=binary, 104 boundaries=boundaries, 105 offsets=offsets, 106 binary_is_exclusive=False 107 ) 108 109 return torch_em.default_segmentation_dataset( 110 raw_paths=file_paths, 111 raw_key="raw", 112 label_paths=file_paths, 113 label_key="label", 114 patch_shape=patch_shape, 115 **kwargs 116 ) 117 118 119def get_mouse_embryo_loader( 120 path: Union[os.PathLike, str], 121 name: str, 122 split: str, 123 patch_shape: Tuple[int, int, int], 124 batch_size: int, 125 download: bool = False, 126 offsets: Optional[List[List[int]]] = None, 127 boundaries: bool = False, 128 binary: bool = False, 129 **kwargs, 130) -> DataLoader: 131 """Get the mouse embryo dataset for cell or nucleus segmentation. 132 133 Args: 134 path: Filepath to a folder where the downloaded data will be saved. 135 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 136 split: The split to use for the dataset. Either 'train' or 'val'. 137 patch_shape: The patch shape to use for training. 138 batch_size: The batch size for training. 139 download: Whether to download the data if it is not present. 140 offsets: Offset values for affinity computation used as target. 141 boundaries: Whether to compute boundaries as the target. 142 binary: Whether to use a binary segmentation target. 143 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 144 145 Returns: 146 The DataLoader. 147 """ 148 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 149 dataset = get_mouse_embryo_dataset( 150 path, name, split, patch_shape, download=download, offsets=offsets, 151 boundaries=boundaries, binary=binary, **ds_kwargs 152 ) 153 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/record/6546550/files/MouseEmbryos.zip?download=1'
CHECKSUM =
'bf24df25e5f919489ce9e674876ff27e06af84445c48cf2900f1ab590a042622'
def
get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str:
24def get_mouse_embryo_data(path: Union[os.PathLike, str], download: bool) -> str: 25 """Download the mouse embryo dataset. 26 27 Args: 28 path: Filepath to a folder where the downloaded data will be saved. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 The filepath for the downloaded data. 33 """ 34 if os.path.exists(path): 35 return path 36 os.makedirs(path, exist_ok=True) 37 tmp_path = os.path.join(path, "mouse_embryo.zip") 38 util.download_source(tmp_path, URL, download, CHECKSUM) 39 util.unzip(tmp_path, path, remove=True) 40 # Remove empty volume. 41 os.remove(os.path.join(path, "Membrane", "train", "fused_paral_stack0_chan2_tp00073_raw_crop_bg_noise.h5")) 42 return path
Download the mouse embryo dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the downloaded data.
def
get_mouse_embryo_paths( path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> List[str]:
45def get_mouse_embryo_paths(path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> List[str]: 46 """Get paths to the Mouse Embryo data. 47 48 Args: 49 path: Filepath to a folder where the downloaded data will be saved. 50 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 51 split: The split to use for the dataset. Either 'train' or 'val'. 52 download: Whether to download the data if it is not present. 53 54 Returns: 55 List of filepaths for the stored data. 56 """ 57 get_mouse_embryo_data(path, download) 58 59 # the naming of the data is inconsistent: membrane has val, nuclei has test; 60 # we treat nuclei:test as val 61 split_ = "test" if name == "nuclei" and split == "val" else split 62 file_paths = glob(os.path.join(path, name.capitalize(), split_, "*.h5")) 63 file_paths.sort() 64 65 return file_paths
Get paths to the Mouse Embryo data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
- split: The split to use for the dataset. Either 'train' or 'val'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the stored data.
def
get_mouse_embryo_dataset( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
68def get_mouse_embryo_dataset( 69 path: Union[os.PathLike, str], 70 name: str, 71 split: str, 72 patch_shape: Tuple[int, int], 73 download: bool = False, 74 offsets: Optional[List[List[int]]] = None, 75 boundaries: bool = False, 76 binary: bool = False, 77 **kwargs, 78) -> Dataset: 79 """Get the mouse embryo dataset for cell or nucleus segmentation. 80 81 Args: 82 path: Filepath to a folder where the downloaded data will be saved. 83 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 84 split: The split to use for the dataset. Either 'train' or 'val'. 85 patch_shape: The patch shape to use for training. 86 download: Whether to download the data if it is not present. 87 offsets: Offset values for affinity computation used as target. 88 boundaries: Whether to compute boundaries as the target. 89 binary: Whether to use a binary segmentation target. 90 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 91 92 Returns: 93 The segmentation dataset. 94 """ 95 assert name in ("membrane", "nuclei") 96 assert split in ("train", "val") 97 assert len(patch_shape) == 3 98 99 file_paths = get_mouse_embryo_paths(path, name, split, download) 100 101 kwargs, _ = util.add_instance_label_transform( 102 kwargs, 103 add_binary_target=binary, 104 binary=binary, 105 boundaries=boundaries, 106 offsets=offsets, 107 binary_is_exclusive=False 108 ) 109 110 return torch_em.default_segmentation_dataset( 111 raw_paths=file_paths, 112 raw_key="raw", 113 label_paths=file_paths, 114 label_key="label", 115 patch_shape=patch_shape, 116 **kwargs 117 )
Get the mouse embryo dataset for cell or nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
- split: The split to use for the dataset. Either 'train' or 'val'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_mouse_embryo_loader( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
120def get_mouse_embryo_loader( 121 path: Union[os.PathLike, str], 122 name: str, 123 split: str, 124 patch_shape: Tuple[int, int, int], 125 batch_size: int, 126 download: bool = False, 127 offsets: Optional[List[List[int]]] = None, 128 boundaries: bool = False, 129 binary: bool = False, 130 **kwargs, 131) -> DataLoader: 132 """Get the mouse embryo dataset for cell or nucleus segmentation. 133 134 Args: 135 path: Filepath to a folder where the downloaded data will be saved. 136 name: The name of the segmentation task. Either 'membrane' or 'nuclei'. 137 split: The split to use for the dataset. Either 'train' or 'val'. 138 patch_shape: The patch shape to use for training. 139 batch_size: The batch size for training. 140 download: Whether to download the data if it is not present. 141 offsets: Offset values for affinity computation used as target. 142 boundaries: Whether to compute boundaries as the target. 143 binary: Whether to use a binary segmentation target. 144 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 145 146 Returns: 147 The DataLoader. 148 """ 149 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 150 dataset = get_mouse_embryo_dataset( 151 path, name, split, patch_shape, download=download, offsets=offsets, 152 boundaries=boundaries, binary=binary, **ds_kwargs 153 ) 154 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the mouse embryo dataset for cell or nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the segmentation task. Either 'membrane' or 'nuclei'.
- split: The split to use for the dataset. Either 'train' or 'val'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.