torch_em.data.datasets.light_microscopy.lsm_mouse_embryo

The LSM Mouse Embryo dataset contains annotations for tissue and cell segmentation in light-sheet microscopy images of mouse embryos.

NOTE: The dataset only has semantic segmentation.

The dataset is from the publication https://doi.org/10.1109/ACCESS.2022.3210542. Please cite it if you use this dataset in your research.

  1"""The LSM Mouse Embryo dataset contains annotations for tissue and cell segmentation
  2in light-sheet microscopy images of mouse embryos.
  3
  4NOTE: The dataset only has semantic segmentation.
  5
  6The dataset is from the publication https://doi.org/10.1109/ACCESS.2022.3210542.
  7Please cite it if you use this dataset in your research.
  8"""
  9
 10import os
 11from glob import glob
 12from natsort import natsorted
 13from typing import Union, Literal, Tuple, List
 14
 15import numpy as np
 16import imageio.v3 as imageio
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25URL = "https://www.dropbox.com/s/7zkk4j415ncfs47/LSM_Segmentation_Dataset.zip?dl=1"
 26CHECKSUM = None
 27
 28TASKS = {
 29    "tissue": {"dir": "DAPI-Tissue", "mask_dir": "Mask"},
 30    "cells": {"dir": "DAPI-Cells", "mask_dir": "Mesen_Mask"},
 31    "proliferating_cells": {"dir": "PHH3-Cells", "mask_dir": "Mask"},
 32}
 33
 34TASK_NAMES = list(TASKS.keys())
 35SPLITS = ["Training", "Validation", "Test"]
 36_SPLIT_MAPPING = {"train": "Training", "val": "Validation", "test": "Test"}
 37
 38
 39def _preprocess_masks(mask_dir, processed_dir):
 40    """Normalize masks to single-channel uint8 format.
 41
 42    Some PHH3-Cells masks are stored as RGBA PNGs instead of binary masks.
 43    This function converts all masks to a consistent single-channel uint8 format.
 44    """
 45    os.makedirs(processed_dir, exist_ok=True)
 46
 47    mask_paths = natsorted(glob(os.path.join(mask_dir, "*.png")))
 48    processed_paths = []
 49    for mask_path in mask_paths:
 50        fname = os.path.basename(mask_path)
 51        out_path = os.path.join(processed_dir, fname.replace(".png", ".tif"))
 52        processed_paths.append(out_path)
 53
 54        if os.path.exists(out_path):
 55            continue
 56
 57        mask = imageio.imread(mask_path)
 58
 59        # Handle RGBA/RGB masks: convert to binary using the first channel.
 60        if mask.ndim == 3:
 61            mask = (mask[..., 0] > 0)
 62
 63        mask = np.asarray(mask, dtype="uint8")
 64        imageio.imwrite(out_path, mask, compression="zlib")
 65
 66    return processed_paths
 67
 68
 69def get_lsm_mouse_embryo_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 70    """Download the LSM Mouse Embryo dataset.
 71
 72    Args:
 73        path: Filepath to a folder where the downloaded data will be saved.
 74        download: Whether to download the data if it is not present.
 75
 76    Returns:
 77        The filepath to the extracted data directory.
 78    """
 79    data_dir = os.path.join(path, "LSM_Segmentation_Dataset")
 80    if os.path.exists(data_dir):
 81        return data_dir
 82
 83    os.makedirs(path, exist_ok=True)
 84    zip_path = os.path.join(path, "LSM_Segmentation_Dataset.zip")
 85    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 86    util.unzip(zip_path=zip_path, dst=path)
 87
 88    return data_dir
 89
 90
 91def get_lsm_mouse_embryo_paths(
 92    path: Union[os.PathLike, str],
 93    split: Literal["train", "val", "test"] = "train",
 94    task: Literal["tissue", "cells", "proliferating_cells"] = "tissue",
 95    download: bool = False,
 96) -> Tuple[List[str], List[str]]:
 97    """Get paths to the LSM Mouse Embryo data.
 98
 99    Args:
100        path: Filepath to a folder where the downloaded data will be saved.
101        split: The data split to use. One of 'train', 'val' or 'test'.
102        task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural
103            ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images)
104            or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
105        download: Whether to download the data if it is not present.
106
107    Returns:
108        List of filepaths for the image data.
109        List of filepaths for the label data.
110    """
111    assert split in _SPLIT_MAPPING, f"'{split}' is not a valid split. Choose from {list(_SPLIT_MAPPING.keys())}."
112    assert task in TASKS, f"'{task}' is not a valid task. Choose from {TASK_NAMES}."
113
114    data_dir = get_lsm_mouse_embryo_data(path, download)
115    split_name = _SPLIT_MAPPING[split]
116
117    task_info = TASKS[task]
118    image_dir = os.path.join(data_dir, task_info["dir"], split_name, "Original")
119    mask_dir = os.path.join(data_dir, task_info["dir"], split_name, task_info["mask_dir"])
120
121    image_paths = natsorted(glob(os.path.join(image_dir, "*.png")))
122    assert len(image_paths) > 0, f"No images found in {image_dir}"
123
124    # Preprocess masks to ensure consistent single-channel format.
125    processed_dir = os.path.join(path, "processed_masks", task, split_name)
126    if not os.path.exists(processed_dir) or len(glob(os.path.join(processed_dir, "*.tif"))) == 0:
127        seg_paths = _preprocess_masks(mask_dir, processed_dir)
128    else:
129        seg_paths = natsorted(glob(os.path.join(processed_dir, "*.tif")))
130
131    assert len(image_paths) == len(seg_paths), \
132        f"Mismatch: {len(image_paths)} images vs {len(seg_paths)} masks for {task}/{split_name}"
133
134    return image_paths, seg_paths
135
136
137def get_lsm_mouse_embryo_dataset(
138    path: Union[os.PathLike, str],
139    patch_shape: Tuple[int, int],
140    split: Literal["train", "val", "test"] = "train",
141    task: Literal["tissue", "cells", "proliferating_cells"] = "tissue",
142    download: bool = False,
143    **kwargs
144) -> Dataset:
145    """Get the LSM Mouse Embryo dataset for tissue and cell segmentation.
146
147    Args:
148        path: Filepath to a folder where the downloaded data will be saved.
149        patch_shape: The patch shape to use for training.
150        split: The data split to use. One of 'train', 'val' or 'test'.
151        task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural
152            ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images)
153            or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
154        download: Whether to download the data if it is not present.
155        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
156
157    Returns:
158        The segmentation dataset.
159    """
160    image_paths, seg_paths = get_lsm_mouse_embryo_paths(path, split, task, download)
161
162    kwargs = util.ensure_transforms(ndim=2, **kwargs)
163
164    return torch_em.default_segmentation_dataset(
165        raw_paths=image_paths,
166        raw_key=None,
167        label_paths=seg_paths,
168        label_key=None,
169        patch_shape=patch_shape,
170        is_seg_dataset=False,
171        ndim=2,
172        **kwargs
173    )
174
175
176def get_lsm_mouse_embryo_loader(
177    path: Union[os.PathLike, str],
178    batch_size: int,
179    patch_shape: Tuple[int, int],
180    split: Literal["train", "val", "test"] = "train",
181    task: Literal["tissue", "cells", "proliferating_cells"] = "tissue",
182    download: bool = False,
183    **kwargs
184) -> DataLoader:
185    """Get the LSM Mouse Embryo dataloader for tissue and cell segmentation.
186
187    Args:
188        path: Filepath to a folder where the downloaded data will be saved.
189        batch_size: The batch size for training.
190        patch_shape: The patch shape to use for training.
191        split: The data split to use. One of 'train', 'val' or 'test'.
192        task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural
193            ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images)
194            or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
195        download: Whether to download the data if it is not present.
196        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
197
198    Returns:
199        The DataLoader.
200    """
201    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
202    dataset = get_lsm_mouse_embryo_dataset(
203        path=path,
204        patch_shape=patch_shape,
205        split=split,
206        task=task,
207        download=download,
208        **ds_kwargs,
209    )
210    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://www.dropbox.com/s/7zkk4j415ncfs47/LSM_Segmentation_Dataset.zip?dl=1'
CHECKSUM = None
TASKS = {'tissue': {'dir': 'DAPI-Tissue', 'mask_dir': 'Mask'}, 'cells': {'dir': 'DAPI-Cells', 'mask_dir': 'Mesen_Mask'}, 'proliferating_cells': {'dir': 'PHH3-Cells', 'mask_dir': 'Mask'}}
TASK_NAMES = ['tissue', 'cells', 'proliferating_cells']
SPLITS = ['Training', 'Validation', 'Test']
def get_lsm_mouse_embryo_data(path: Union[os.PathLike, str], download: bool = False) -> str:
70def get_lsm_mouse_embryo_data(path: Union[os.PathLike, str], download: bool = False) -> str:
71    """Download the LSM Mouse Embryo dataset.
72
73    Args:
74        path: Filepath to a folder where the downloaded data will be saved.
75        download: Whether to download the data if it is not present.
76
77    Returns:
78        The filepath to the extracted data directory.
79    """
80    data_dir = os.path.join(path, "LSM_Segmentation_Dataset")
81    if os.path.exists(data_dir):
82        return data_dir
83
84    os.makedirs(path, exist_ok=True)
85    zip_path = os.path.join(path, "LSM_Segmentation_Dataset.zip")
86    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
87    util.unzip(zip_path=zip_path, dst=path)
88
89    return data_dir

Download the LSM Mouse Embryo dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the extracted data directory.

def get_lsm_mouse_embryo_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'] = 'train', task: Literal['tissue', 'cells', 'proliferating_cells'] = 'tissue', download: bool = False) -> Tuple[List[str], List[str]]:
 92def get_lsm_mouse_embryo_paths(
 93    path: Union[os.PathLike, str],
 94    split: Literal["train", "val", "test"] = "train",
 95    task: Literal["tissue", "cells", "proliferating_cells"] = "tissue",
 96    download: bool = False,
 97) -> Tuple[List[str], List[str]]:
 98    """Get paths to the LSM Mouse Embryo data.
 99
100    Args:
101        path: Filepath to a folder where the downloaded data will be saved.
102        split: The data split to use. One of 'train', 'val' or 'test'.
103        task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural
104            ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images)
105            or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
106        download: Whether to download the data if it is not present.
107
108    Returns:
109        List of filepaths for the image data.
110        List of filepaths for the label data.
111    """
112    assert split in _SPLIT_MAPPING, f"'{split}' is not a valid split. Choose from {list(_SPLIT_MAPPING.keys())}."
113    assert task in TASKS, f"'{task}' is not a valid task. Choose from {TASK_NAMES}."
114
115    data_dir = get_lsm_mouse_embryo_data(path, download)
116    split_name = _SPLIT_MAPPING[split]
117
118    task_info = TASKS[task]
119    image_dir = os.path.join(data_dir, task_info["dir"], split_name, "Original")
120    mask_dir = os.path.join(data_dir, task_info["dir"], split_name, task_info["mask_dir"])
121
122    image_paths = natsorted(glob(os.path.join(image_dir, "*.png")))
123    assert len(image_paths) > 0, f"No images found in {image_dir}"
124
125    # Preprocess masks to ensure consistent single-channel format.
126    processed_dir = os.path.join(path, "processed_masks", task, split_name)
127    if not os.path.exists(processed_dir) or len(glob(os.path.join(processed_dir, "*.tif"))) == 0:
128        seg_paths = _preprocess_masks(mask_dir, processed_dir)
129    else:
130        seg_paths = natsorted(glob(os.path.join(processed_dir, "*.tif")))
131
132    assert len(image_paths) == len(seg_paths), \
133        f"Mismatch: {len(image_paths)} images vs {len(seg_paths)} masks for {task}/{split_name}"
134
135    return image_paths, seg_paths

Get paths to the LSM Mouse Embryo data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use. One of 'train', 'val' or 'test'.
  • task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_lsm_mouse_embryo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'] = 'train', task: Literal['tissue', 'cells', 'proliferating_cells'] = 'tissue', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
138def get_lsm_mouse_embryo_dataset(
139    path: Union[os.PathLike, str],
140    patch_shape: Tuple[int, int],
141    split: Literal["train", "val", "test"] = "train",
142    task: Literal["tissue", "cells", "proliferating_cells"] = "tissue",
143    download: bool = False,
144    **kwargs
145) -> Dataset:
146    """Get the LSM Mouse Embryo dataset for tissue and cell segmentation.
147
148    Args:
149        path: Filepath to a folder where the downloaded data will be saved.
150        patch_shape: The patch shape to use for training.
151        split: The data split to use. One of 'train', 'val' or 'test'.
152        task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural
153            ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images)
154            or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
155        download: Whether to download the data if it is not present.
156        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
157
158    Returns:
159        The segmentation dataset.
160    """
161    image_paths, seg_paths = get_lsm_mouse_embryo_paths(path, split, task, download)
162
163    kwargs = util.ensure_transforms(ndim=2, **kwargs)
164
165    return torch_em.default_segmentation_dataset(
166        raw_paths=image_paths,
167        raw_key=None,
168        label_paths=seg_paths,
169        label_key=None,
170        patch_shape=patch_shape,
171        is_seg_dataset=False,
172        ndim=2,
173        **kwargs
174    )

Get the LSM Mouse Embryo dataset for tissue and cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. One of 'train', 'val' or 'test'.
  • task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_lsm_mouse_embryo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'] = 'train', task: Literal['tissue', 'cells', 'proliferating_cells'] = 'tissue', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
177def get_lsm_mouse_embryo_loader(
178    path: Union[os.PathLike, str],
179    batch_size: int,
180    patch_shape: Tuple[int, int],
181    split: Literal["train", "val", "test"] = "train",
182    task: Literal["tissue", "cells", "proliferating_cells"] = "tissue",
183    download: bool = False,
184    **kwargs
185) -> DataLoader:
186    """Get the LSM Mouse Embryo dataloader for tissue and cell segmentation.
187
188    Args:
189        path: Filepath to a folder where the downloaded data will be saved.
190        batch_size: The batch size for training.
191        patch_shape: The patch shape to use for training.
192        split: The data split to use. One of 'train', 'val' or 'test'.
193        task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural
194            ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images)
195            or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
196        download: Whether to download the data if it is not present.
197        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
198
199    Returns:
200        The DataLoader.
201    """
202    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
203    dataset = get_lsm_mouse_embryo_dataset(
204        path=path,
205        patch_shape=patch_shape,
206        split=split,
207        task=task,
208        download=download,
209        **ds_kwargs,
210    )
211    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the LSM Mouse Embryo dataloader for tissue and cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. One of 'train', 'val' or 'test'.
  • task: The segmentation task. One of 'tissue' (3-class semantic segmentation of neural ectoderm and mesenchyme), 'cells' (binary cell segmentation in DAPI-stained images) or 'proliferating_cells' (binary segmentation of pHH3-stained proliferating cells).
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.