torch_em.data.datasets.light_microscopy.blastospim
The BlastoSPIM dataset contains annotations for nucleus segmentation in selective plane illumination microscopy (SPIM) images of preimplantation mouse embryo.
This dataset is from the publication https://doi.org/10.1242/dev.202817. Please cite it if you use this dataset for your research.
1"""The BlastoSPIM dataset contains annotations for nucleus segmentation in 2selective plane illumination microscopy (SPIM) images of preimplantation mouse embryo. 3 4This dataset is from the publication https://doi.org/10.1242/dev.202817. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from pathlib import Path 12from natsort import natsorted 13from typing import Tuple, List, Union 14 15import gzip 16import numpy as np 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25URL = "https://plus.figshare.com/ndownloader/articles/26540593/versions/1" 26CHECKSUM = "8be979c5a06cfad479a5cfe21b8bbb0e26f0e677cb052fe43275fa451fa9e9ac" 27 28 29def _preprocess_inputs(data_dir): 30 import h5py 31 32 raw_paths = natsorted(glob(os.path.join(data_dir, "*_image_*.npy.gz"))) 33 label_paths = natsorted(glob(os.path.join(data_dir, "*_masks_*.npy.gz"))) 34 35 preprocessed_dir = os.path.join(data_dir, "preprocessed") 36 os.makedirs(preprocessed_dir, exist_ok=True) 37 38 for rpath, lpath in tqdm( 39 zip(raw_paths, label_paths), desc="Preprocessing inputs", total=len(raw_paths) 40 ): 41 with gzip.open(rpath, "rb") as fr: 42 raw = np.load(fr) 43 44 with gzip.open(lpath, "rb") as fl: 45 labels = np.load(fl) 46 47 vname = os.path.basename(rpath).split(".")[0] 48 volume_path = os.path.join(preprocessed_dir, Path(vname).with_suffix(".h5")) 49 with h5py.File(volume_path, "w") as f: 50 f.create_dataset("raw", data=raw, compression="gzip") 51 f.create_dataset("labels", data=labels, compression="gzip") 52 53 54def get_blastospim_data(path: Union[os.PathLike, str], download: bool = False) -> str: 55 """Download the BlastoSPIM dataset. 56 57 Args: 58 path: Filepath to a folder where the downloaded data will be saved. 59 download: Whether to download the data if it is not present. 60 61 Returns: 62 Filepath where the data is downloaded and preprocessed. 63 """ 64 data_dir = os.path.join(path, "data", "preprocessed") 65 if os.path.exists(data_dir): 66 return data_dir 67 68 os.makedirs(path, exist_ok=True) 69 70 zip_path = os.path.join(path, "data.zip") 71 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 72 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data")) 73 74 # Preprocess inputs. 75 _preprocess_inputs(os.path.join(path, "data")) 76 77 return data_dir 78 79 80def get_blastospim_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]: 81 """Get paths to the BlastoSPIM data. 82 83 Args: 84 path: Filepath to a folder where the downloaded data will be saved. 85 download: Whether to download the data if it is not present. 86 87 Returns: 88 List of filepaths for the input data. 89 """ 90 data_dir = get_blastospim_data(path, download) 91 volume_paths = glob(os.path.join(data_dir, "*.h5")) 92 return volume_paths 93 94 95def get_blastospim_dataset( 96 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs 97) -> Dataset: 98 """Download the BlastoSPIM dataset for nucleus segmentation. 99 100 Args: 101 path: Filepath to a folder where the downloaded data will be saved. 102 patch_shape: The patch shape to use for training. 103 download: Whether to download the data if it is not present. 104 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 105 106 Returns: 107 The segmentation dataset. 108 """ 109 volume_paths = get_blastospim_paths(path, download) 110 111 return torch_em.default_segmentation_dataset( 112 raw_paths=volume_paths, 113 raw_key="raw", 114 label_paths=volume_paths, 115 label_key="labels", 116 patch_shape=patch_shape, 117 is_seg_dataset=True, 118 **kwargs 119 ) 120 121 122def get_blastospim_loader( 123 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs 124) -> DataLoader: 125 """Download the BlastoSPIM dataloader for nucleus segmentation. 126 127 Args: 128 path: Filepath to a folder where the downloaded data will be saved. 129 batch_size: The batch size for training. 130 patch_shape: The patch shape to use for training. 131 download: Whether to download the data if it is not present. 132 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 133 134 Returns: 135 The DataLoader. 136 """ 137 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 138 dataset = get_blastospim_dataset(path, patch_shape, download, **ds_kwargs) 139 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://plus.figshare.com/ndownloader/articles/26540593/versions/1'
CHECKSUM =
'8be979c5a06cfad479a5cfe21b8bbb0e26f0e677cb052fe43275fa451fa9e9ac'
def
get_blastospim_data(path: Union[os.PathLike, str], download: bool = False) -> str:
55def get_blastospim_data(path: Union[os.PathLike, str], download: bool = False) -> str: 56 """Download the BlastoSPIM dataset. 57 58 Args: 59 path: Filepath to a folder where the downloaded data will be saved. 60 download: Whether to download the data if it is not present. 61 62 Returns: 63 Filepath where the data is downloaded and preprocessed. 64 """ 65 data_dir = os.path.join(path, "data", "preprocessed") 66 if os.path.exists(data_dir): 67 return data_dir 68 69 os.makedirs(path, exist_ok=True) 70 71 zip_path = os.path.join(path, "data.zip") 72 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 73 util.unzip(zip_path=zip_path, dst=os.path.join(path, "data")) 74 75 # Preprocess inputs. 76 _preprocess_inputs(os.path.join(path, "data")) 77 78 return data_dir
Download the BlastoSPIM dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded and preprocessed.
def
get_blastospim_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
81def get_blastospim_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]: 82 """Get paths to the BlastoSPIM data. 83 84 Args: 85 path: Filepath to a folder where the downloaded data will be saved. 86 download: Whether to download the data if it is not present. 87 88 Returns: 89 List of filepaths for the input data. 90 """ 91 data_dir = get_blastospim_data(path, download) 92 volume_paths = glob(os.path.join(data_dir, "*.h5")) 93 return volume_paths
Get paths to the BlastoSPIM data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the input data.
def
get_blastospim_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
96def get_blastospim_dataset( 97 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs 98) -> Dataset: 99 """Download the BlastoSPIM dataset for nucleus segmentation. 100 101 Args: 102 path: Filepath to a folder where the downloaded data will be saved. 103 patch_shape: The patch shape to use for training. 104 download: Whether to download the data if it is not present. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 106 107 Returns: 108 The segmentation dataset. 109 """ 110 volume_paths = get_blastospim_paths(path, download) 111 112 return torch_em.default_segmentation_dataset( 113 raw_paths=volume_paths, 114 raw_key="raw", 115 label_paths=volume_paths, 116 label_key="labels", 117 patch_shape=patch_shape, 118 is_seg_dataset=True, 119 **kwargs 120 )
Download the BlastoSPIM dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_blastospim_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
123def get_blastospim_loader( 124 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs 125) -> DataLoader: 126 """Download the BlastoSPIM dataloader for nucleus segmentation. 127 128 Args: 129 path: Filepath to a folder where the downloaded data will be saved. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 download: Whether to download the data if it is not present. 133 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 134 135 Returns: 136 The DataLoader. 137 """ 138 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 139 dataset = get_blastospim_dataset(path, patch_shape, download, **ds_kwargs) 140 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Download the BlastoSPIM dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.