torch_em.data.datasets.light_microscopy.lsm_mouse_embryo
The LSM Mouse Embryo dataset contains annotations for tissue and cell segmentation in light-sheet microscopy images of mouse embryos.
NOTE: The dataset only has semantic segmentation.
The dataset is from the publication https://doi.org/10.1109/ACCESS.2022.3210542. Please cite it if you use this dataset in your research.
1"""The LSM Mouse Embryo dataset contains annotations for tissue and cell segmentation 2in light-sheet microscopy images of mouse embryos. 3 4NOTE: The dataset only has semantic segmentation. 5 6The dataset is from the publication https://doi.org/10.1109/ACCESS.2022.3210542. 7Please cite it if you use this dataset in your research. 8""" 9 10import os 11from glob import glob 12from natsort import natsorted 13from typing import Union, Literal, Tuple, List 14 15import numpy as np 16import imageio.v3 as imageio 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25URL = "https://www.dropbox.com/s/7zkk4j415ncfs47/LSM_Segmentation_Dataset.zip?dl=1" 26CHECKSUM = None 27 28TASKS = { 29 "tissue": {"dir": "DAPI-Tissue", "mask_dir": "Mask"}, 30 "cells": {"dir": "DAPI-Cells", "mask_dir": "Mesen_Mask"}, 31 "proliferating_cells": {"dir": "PHH3-Cells", "mask_dir": "Mask"}, 32} 33 34TASK_NAMES = list(TASKS.keys()) 35SPLITS = ["Training", "Validation", "Test"] 36_SPLIT_MAPPING = {"train": "Training", "val": "Validation", "test": "Test"} 37 38 39def _preprocess_masks(mask_dir, processed_dir): 40 """Normalize masks to single-channel uint8 format. 41 42 Some PHH3-Cells masks are stored as RGBA PNGs instead of binary masks. 43 This function converts all masks to a consistent single-channel uint8 format. 44 """ 45 os.makedirs(processed_dir, exist_ok=True) 46 47 mask_paths = natsorted(glob(os.path.join(mask_dir, "*.png"))) 48 processed_paths = [] 49 for mask_path in mask_paths: 50 fname = os.path.basename(mask_path) 51 out_path = os.path.join(processed_dir, fname.replace(".png", ".tif")) 52 processed_paths.append(out_path) 53 54 if os.path.exists(out_path): 55 continue 56 57 mask = imageio.imread(mask_path) 58 59 # Handle RGBA/RGB masks: convert to binary using the first channel. 60 if mask.ndim == 3: 61 mask = (mask[..., 0] > 0) 62 63 mask = np.asarray(mask, dtype="uint8") 64 imageio.imwrite(out_path, mask, compression="zlib") 65 66 return processed_paths 67 68 69def get_lsm_mouse_embryo_data(path: Union[os.PathLike, str], download: bool = False) -> str: 70 """Download the LSM Mouse Embryo dataset. 71 72 Args: 73 path: Filepath to a folder where the downloaded data will be saved. 74 download: Whether to download the data if it is not present. 75 76 Returns: 77 The filepath to the extracted data directory. 78 """ 79 data_dir = os.path.join(path, "LSM_Segmentation_Dataset") 80 if os.path.exists(data_dir): 81 return data_dir 82 83 os.makedirs(path, exist_ok=True) 84 zip_path = os.path.join(path, "LSM_Segmentation_Dataset.zip") 85 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 86 util.unzip(zip_path=zip_path, dst=path) 87 88 return data_dir 89 90 91def get_lsm_mouse_embryo_paths( 92 path: Union[os.PathLike, str], 93 split: Literal["train", "val", "test"] = "train", 94 task: Literal["tissue", "cells", "proliferating_cells"] = "tissue", 95 download: bool = False, 96) -> Tuple[List[str], List[str]]: 97 """Get paths to the LSM Mouse Embryo data. 98 99 Args: 100 path: Filepath to a folder where the downloaded data will be saved. 101 split: The data split to use. One of 'train', 'val' or 'test'. 102 task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural 103 ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) 104 or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells). 105 download: Whether to download the data if it is not present. 106 107 Returns: 108 List of filepaths for the image data. 109 List of filepaths for the label data. 110 """ 111 assert split in _SPLIT_MAPPING, f"'{split}' is not a valid split. Choose from {list(_SPLIT_MAPPING.keys())}." 112 assert task in TASKS, f"'{task}' is not a valid task. Choose from {TASK_NAMES}." 113 114 data_dir = get_lsm_mouse_embryo_data(path, download) 115 split_name = _SPLIT_MAPPING[split] 116 117 task_info = TASKS[task] 118 image_dir = os.path.join(data_dir, task_info["dir"], split_name, "Original") 119 mask_dir = os.path.join(data_dir, task_info["dir"], split_name, task_info["mask_dir"]) 120 121 image_paths = natsorted(glob(os.path.join(image_dir, "*.png"))) 122 assert len(image_paths) > 0, f"No images found in {image_dir}" 123 124 # Preprocess masks to ensure consistent single-channel format. 125 processed_dir = os.path.join(path, "processed_masks", task, split_name) 126 if not os.path.exists(processed_dir) or len(glob(os.path.join(processed_dir, "*.tif"))) == 0: 127 seg_paths = _preprocess_masks(mask_dir, processed_dir) 128 else: 129 seg_paths = natsorted(glob(os.path.join(processed_dir, "*.tif"))) 130 131 assert len(image_paths) == len(seg_paths), \ 132 f"Mismatch: {len(image_paths)} images vs {len(seg_paths)} masks for {task}/{split_name}" 133 134 return image_paths, seg_paths 135 136 137def get_lsm_mouse_embryo_dataset( 138 path: Union[os.PathLike, str], 139 patch_shape: Tuple[int, int], 140 split: Literal["train", "val", "test"] = "train", 141 task: Literal["tissue", "cells", "proliferating_cells"] = "tissue", 142 download: bool = False, 143 **kwargs 144) -> Dataset: 145 """Get the LSM Mouse Embryo dataset for tissue and cell segmentation. 146 147 Args: 148 path: Filepath to a folder where the downloaded data will be saved. 149 patch_shape: The patch shape to use for training. 150 split: The data split to use. One of 'train', 'val' or 'test'. 151 task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural 152 ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) 153 or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells). 154 download: Whether to download the data if it is not present. 155 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 156 157 Returns: 158 The segmentation dataset. 159 """ 160 image_paths, seg_paths = get_lsm_mouse_embryo_paths(path, split, task, download) 161 162 kwargs = util.ensure_transforms(ndim=2, **kwargs) 163 164 return torch_em.default_segmentation_dataset( 165 raw_paths=image_paths, 166 raw_key=None, 167 label_paths=seg_paths, 168 label_key=None, 169 patch_shape=patch_shape, 170 is_seg_dataset=False, 171 ndim=2, 172 **kwargs 173 ) 174 175 176def get_lsm_mouse_embryo_loader( 177 path: Union[os.PathLike, str], 178 batch_size: int, 179 patch_shape: Tuple[int, int], 180 split: Literal["train", "val", "test"] = "train", 181 task: Literal["tissue", "cells", "proliferating_cells"] = "tissue", 182 download: bool = False, 183 **kwargs 184) -> DataLoader: 185 """Get the LSM Mouse Embryo dataloader for tissue and cell segmentation. 186 187 Args: 188 path: Filepath to a folder where the downloaded data will be saved. 189 batch_size: The batch size for training. 190 patch_shape: The patch shape to use for training. 191 split: The data split to use. One of 'train', 'val' or 'test'. 192 task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural 193 ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) 194 or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells). 195 download: Whether to download the data if it is not present. 196 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 197 198 Returns: 199 The DataLoader. 200 """ 201 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 202 dataset = get_lsm_mouse_embryo_dataset( 203 path=path, 204 patch_shape=patch_shape, 205 split=split, 206 task=task, 207 download=download, 208 **ds_kwargs, 209 ) 210 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://www.dropbox.com/s/7zkk4j415ncfs47/LSM_Segmentation_Dataset.zip?dl=1'
CHECKSUM =
None
TASKS =
{'tissue': {'dir': 'DAPI-Tissue', 'mask_dir': 'Mask'}, 'cells': {'dir': 'DAPI-Cells', 'mask_dir': 'Mesen_Mask'}, 'proliferating_cells': {'dir': 'PHH3-Cells', 'mask_dir': 'Mask'}}
TASK_NAMES =
['tissue', 'cells', 'proliferating_cells']
SPLITS =
['Training', 'Validation', 'Test']
def
get_lsm_mouse_embryo_data(path: Union[os.PathLike, str], download: bool = False) -> str:
70def get_lsm_mouse_embryo_data(path: Union[os.PathLike, str], download: bool = False) -> str: 71 """Download the LSM Mouse Embryo dataset. 72 73 Args: 74 path: Filepath to a folder where the downloaded data will be saved. 75 download: Whether to download the data if it is not present. 76 77 Returns: 78 The filepath to the extracted data directory. 79 """ 80 data_dir = os.path.join(path, "LSM_Segmentation_Dataset") 81 if os.path.exists(data_dir): 82 return data_dir 83 84 os.makedirs(path, exist_ok=True) 85 zip_path = os.path.join(path, "LSM_Segmentation_Dataset.zip") 86 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 87 util.unzip(zip_path=zip_path, dst=path) 88 89 return data_dir
Download the LSM Mouse Embryo dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the extracted data directory.
def
get_lsm_mouse_embryo_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'] = 'train', task: Literal['tissue', 'cells', 'proliferating_cells'] = 'tissue', download: bool = False) -> Tuple[List[str], List[str]]:
92def get_lsm_mouse_embryo_paths( 93 path: Union[os.PathLike, str], 94 split: Literal["train", "val", "test"] = "train", 95 task: Literal["tissue", "cells", "proliferating_cells"] = "tissue", 96 download: bool = False, 97) -> Tuple[List[str], List[str]]: 98 """Get paths to the LSM Mouse Embryo data. 99 100 Args: 101 path: Filepath to a folder where the downloaded data will be saved. 102 split: The data split to use. One of 'train', 'val' or 'test'. 103 task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural 104 ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) 105 or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells). 106 download: Whether to download the data if it is not present. 107 108 Returns: 109 List of filepaths for the image data. 110 List of filepaths for the label data. 111 """ 112 assert split in _SPLIT_MAPPING, f"'{split}' is not a valid split. Choose from {list(_SPLIT_MAPPING.keys())}." 113 assert task in TASKS, f"'{task}' is not a valid task. Choose from {TASK_NAMES}." 114 115 data_dir = get_lsm_mouse_embryo_data(path, download) 116 split_name = _SPLIT_MAPPING[split] 117 118 task_info = TASKS[task] 119 image_dir = os.path.join(data_dir, task_info["dir"], split_name, "Original") 120 mask_dir = os.path.join(data_dir, task_info["dir"], split_name, task_info["mask_dir"]) 121 122 image_paths = natsorted(glob(os.path.join(image_dir, "*.png"))) 123 assert len(image_paths) > 0, f"No images found in {image_dir}" 124 125 # Preprocess masks to ensure consistent single-channel format. 126 processed_dir = os.path.join(path, "processed_masks", task, split_name) 127 if not os.path.exists(processed_dir) or len(glob(os.path.join(processed_dir, "*.tif"))) == 0: 128 seg_paths = _preprocess_masks(mask_dir, processed_dir) 129 else: 130 seg_paths = natsorted(glob(os.path.join(processed_dir, "*.tif"))) 131 132 assert len(image_paths) == len(seg_paths), \ 133 f"Mismatch: {len(image_paths)} images vs {len(seg_paths)} masks for {task}/{split_name}" 134 135 return image_paths, seg_paths
Get paths to the LSM Mouse Embryo data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. One of 'train', 'val' or 'test'.
- task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_lsm_mouse_embryo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'] = 'train', task: Literal['tissue', 'cells', 'proliferating_cells'] = 'tissue', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
138def get_lsm_mouse_embryo_dataset( 139 path: Union[os.PathLike, str], 140 patch_shape: Tuple[int, int], 141 split: Literal["train", "val", "test"] = "train", 142 task: Literal["tissue", "cells", "proliferating_cells"] = "tissue", 143 download: bool = False, 144 **kwargs 145) -> Dataset: 146 """Get the LSM Mouse Embryo dataset for tissue and cell segmentation. 147 148 Args: 149 path: Filepath to a folder where the downloaded data will be saved. 150 patch_shape: The patch shape to use for training. 151 split: The data split to use. One of 'train', 'val' or 'test'. 152 task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural 153 ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) 154 or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells). 155 download: Whether to download the data if it is not present. 156 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 157 158 Returns: 159 The segmentation dataset. 160 """ 161 image_paths, seg_paths = get_lsm_mouse_embryo_paths(path, split, task, download) 162 163 kwargs = util.ensure_transforms(ndim=2, **kwargs) 164 165 return torch_em.default_segmentation_dataset( 166 raw_paths=image_paths, 167 raw_key=None, 168 label_paths=seg_paths, 169 label_key=None, 170 patch_shape=patch_shape, 171 is_seg_dataset=False, 172 ndim=2, 173 **kwargs 174 )
Get the LSM Mouse Embryo dataset for tissue and cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. One of 'train', 'val' or 'test'.
- task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_lsm_mouse_embryo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'] = 'train', task: Literal['tissue', 'cells', 'proliferating_cells'] = 'tissue', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
177def get_lsm_mouse_embryo_loader( 178 path: Union[os.PathLike, str], 179 batch_size: int, 180 patch_shape: Tuple[int, int], 181 split: Literal["train", "val", "test"] = "train", 182 task: Literal["tissue", "cells", "proliferating_cells"] = "tissue", 183 download: bool = False, 184 **kwargs 185) -> DataLoader: 186 """Get the LSM Mouse Embryo dataloader for tissue and cell segmentation. 187 188 Args: 189 path: Filepath to a folder where the downloaded data will be saved. 190 batch_size: The batch size for training. 191 patch_shape: The patch shape to use for training. 192 split: The data split to use. One of 'train', 'val' or 'test'. 193 task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural 194 ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) 195 or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells). 196 download: Whether to download the data if it is not present. 197 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 198 199 Returns: 200 The DataLoader. 201 """ 202 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 203 dataset = get_lsm_mouse_embryo_dataset( 204 path=path, 205 patch_shape=patch_shape, 206 split=split, 207 task=task, 208 download=download, 209 **ds_kwargs, 210 ) 211 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the LSM Mouse Embryo dataloader for tissue and cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. One of 'train', 'val' or 'test'.
- task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.