torch_em.data.datasets.light_microscopy.blastospim

The BlastoSPIM dataset contains annotations for nucleus segmentation in selective plane illumination microscopy (SPIM) images of preimplantation mouse embryo.

This dataset is from the publication https://doi.org/10.1242/dev.202817. Please cite it if you use this dataset for your research.

  1"""The BlastoSPIM dataset contains annotations for nucleus segmentation in
  2selective plane illumination microscopy (SPIM) images of preimplantation mouse embryo.
  3
  4This dataset is from the publication https://doi.org/10.1242/dev.202817.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from tqdm import tqdm
 11from pathlib import Path
 12from natsort import natsorted
 13from typing import Tuple, List, Union
 14
 15import gzip
 16import numpy as np
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25URL = "https://plus.figshare.com/ndownloader/articles/26540593/versions/1"
 26CHECKSUM = "8be979c5a06cfad479a5cfe21b8bbb0e26f0e677cb052fe43275fa451fa9e9ac"
 27
 28
 29def _preprocess_inputs(data_dir):
 30    import h5py
 31
 32    raw_paths = natsorted(glob(os.path.join(data_dir, "*_image_*.npy.gz")))
 33    label_paths = natsorted(glob(os.path.join(data_dir, "*_masks_*.npy.gz")))
 34
 35    preprocessed_dir = os.path.join(data_dir, "preprocessed")
 36    os.makedirs(preprocessed_dir, exist_ok=True)
 37
 38    for rpath, lpath in tqdm(
 39        zip(raw_paths, label_paths), desc="Preprocessing inputs", total=len(raw_paths)
 40    ):
 41        with gzip.open(rpath, "rb") as fr:
 42            raw = np.load(fr)
 43
 44        with gzip.open(lpath, "rb") as fl:
 45            labels = np.load(fl)
 46
 47        vname = os.path.basename(rpath).split(".")[0]
 48        volume_path = os.path.join(preprocessed_dir, Path(vname).with_suffix(".h5"))
 49        with h5py.File(volume_path, "w") as f:
 50            f.create_dataset("raw", data=raw, compression="gzip")
 51            f.create_dataset("labels", data=labels, compression="gzip")
 52
 53
 54def get_blastospim_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 55    """Download the BlastoSPIM dataset.
 56
 57    Args:
 58        path: Filepath to a folder where the downloaded data will be saved.
 59        download: Whether to download the data if it is not present.
 60
 61    Returns:
 62        Filepath where the data is downloaded and preprocessed.
 63    """
 64    data_dir = os.path.join(path, "data", "preprocessed")
 65    if os.path.exists(data_dir):
 66        return data_dir
 67
 68    os.makedirs(path, exist_ok=True)
 69
 70    zip_path = os.path.join(path, "data.zip")
 71    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 72    util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"))
 73
 74    # Preprocess inputs.
 75    _preprocess_inputs(os.path.join(path, "data"))
 76
 77    return data_dir
 78
 79
 80def get_blastospim_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
 81    """Get paths to the BlastoSPIM data.
 82
 83    Args:
 84        path: Filepath to a folder where the downloaded data will be saved.
 85        download: Whether to download the data if it is not present.
 86
 87    Returns:
 88        List of filepaths for the input data.
 89    """
 90    data_dir = get_blastospim_data(path, download)
 91    volume_paths = glob(os.path.join(data_dir, "*.h5"))
 92    return volume_paths
 93
 94
 95def get_blastospim_dataset(
 96    path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs
 97) -> Dataset:
 98    """Download the BlastoSPIM dataset for nucleus segmentation.
 99
100    Args:
101        path: Filepath to a folder where the downloaded data will be saved.
102        patch_shape: The patch shape to use for training.
103        download: Whether to download the data if it is not present.
104        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
105
106    Returns:
107        The segmentation dataset.
108    """
109    volume_paths = get_blastospim_paths(path, download)
110
111    return torch_em.default_segmentation_dataset(
112        raw_paths=volume_paths,
113        raw_key="raw",
114        label_paths=volume_paths,
115        label_key="labels",
116        patch_shape=patch_shape,
117        is_seg_dataset=True,
118        **kwargs
119    )
120
121
122def get_blastospim_loader(
123    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs
124) -> DataLoader:
125    """Download the BlastoSPIM dataloader for nucleus segmentation.
126
127    Args:
128        path: Filepath to a folder where the downloaded data will be saved.
129        batch_size: The batch size for training.
130        patch_shape: The patch shape to use for training.
131        download: Whether to download the data if it is not present.
132        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
133
134    Returns:
135        The DataLoader.
136    """
137    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
138    dataset = get_blastospim_dataset(path, patch_shape, download, **ds_kwargs)
139    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://plus.figshare.com/ndownloader/articles/26540593/versions/1'
CHECKSUM = '8be979c5a06cfad479a5cfe21b8bbb0e26f0e677cb052fe43275fa451fa9e9ac'
def get_blastospim_data(path: Union[os.PathLike, str], download: bool = False) -> str:
55def get_blastospim_data(path: Union[os.PathLike, str], download: bool = False) -> str:
56    """Download the BlastoSPIM dataset.
57
58    Args:
59        path: Filepath to a folder where the downloaded data will be saved.
60        download: Whether to download the data if it is not present.
61
62    Returns:
63        Filepath where the data is downloaded and preprocessed.
64    """
65    data_dir = os.path.join(path, "data", "preprocessed")
66    if os.path.exists(data_dir):
67        return data_dir
68
69    os.makedirs(path, exist_ok=True)
70
71    zip_path = os.path.join(path, "data.zip")
72    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
73    util.unzip(zip_path=zip_path, dst=os.path.join(path, "data"))
74
75    # Preprocess inputs.
76    _preprocess_inputs(os.path.join(path, "data"))
77
78    return data_dir

Download the BlastoSPIM dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded and preprocessed.

def get_blastospim_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
81def get_blastospim_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
82    """Get paths to the BlastoSPIM data.
83
84    Args:
85        path: Filepath to a folder where the downloaded data will be saved.
86        download: Whether to download the data if it is not present.
87
88    Returns:
89        List of filepaths for the input data.
90    """
91    data_dir = get_blastospim_data(path, download)
92    volume_paths = glob(os.path.join(data_dir, "*.h5"))
93    return volume_paths

Get paths to the BlastoSPIM data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the input data.

def get_blastospim_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 96def get_blastospim_dataset(
 97    path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs
 98) -> Dataset:
 99    """Download the BlastoSPIM dataset for nucleus segmentation.
100
101    Args:
102        path: Filepath to a folder where the downloaded data will be saved.
103        patch_shape: The patch shape to use for training.
104        download: Whether to download the data if it is not present.
105        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
106
107    Returns:
108        The segmentation dataset.
109    """
110    volume_paths = get_blastospim_paths(path, download)
111
112    return torch_em.default_segmentation_dataset(
113        raw_paths=volume_paths,
114        raw_key="raw",
115        label_paths=volume_paths,
116        label_key="labels",
117        patch_shape=patch_shape,
118        is_seg_dataset=True,
119        **kwargs
120    )

Download the BlastoSPIM dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_blastospim_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
123def get_blastospim_loader(
124    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs
125) -> DataLoader:
126    """Download the BlastoSPIM dataloader for nucleus segmentation.
127
128    Args:
129        path: Filepath to a folder where the downloaded data will be saved.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        download: Whether to download the data if it is not present.
133        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
134
135    Returns:
136        The DataLoader.
137    """
138    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
139    dataset = get_blastospim_dataset(path, patch_shape, download, **ds_kwargs)
140    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Download the BlastoSPIM dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.