torch_em.data.datasets.light_microscopy.parhyale_regen

The Parhyale Regen dataset contains nucleus annotations for parhyale images from confocal microscope.

The dataset is located at https://zenodo.org/records/8252039. This dataset is from the publication https://doi.org/10.7554/eLife.19766.012. Please cite it if you use this dataset for your research.

  1"""The Parhyale Regen dataset contains nucleus annotations for parhyale images from confocal microscope.
  2
  3The dataset is located at https://zenodo.org/records/8252039.
  4This dataset is from the publication https://doi.org/10.7554/eLife.19766.012.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from natsort import natsorted
 11from typing import Union, Tuple, List
 12
 13import imageio.v3 as imageio
 14
 15from torch.utils.data import Dataset, DataLoader
 16
 17import torch_em
 18
 19from .. import util
 20
 21
 22def _preprocess_data(root, path):
 23    import h5py
 24
 25    raw_path = os.path.join(path, "Parhyale_H2B-EGFP_images_tp01-50.tif")
 26    assert os.path.exists(raw_path)
 27
 28    raw = imageio.imread(raw_path)
 29
 30    # We have limited timepoints annotated, let's extract them first.
 31    tps = [0, 10, 20, 30, 40, 49]
 32    raw_tps = [raw[i, ...] for i in tps]
 33    label_tps = [imageio.imread(p) for p in natsorted(glob(os.path.join(path, "*_instance-segmentation-labels_*.tif")))]
 34
 35    # Get the new folder where we store the h5 files.
 36    new_path = os.path.join(root, "preprocessed")
 37    os.makedirs(new_path, exist_ok=True)
 38
 39    for curr_tp, curr_raw, curr_label in zip(tps, raw_tps, label_tps):
 40        # Store each 3d volume per timepoint in their individual h5 files.
 41        fpath = os.path.join(new_path, f"Parhyale_H2B-EGFP_{curr_tp + 1}.h5")
 42        with h5py.File(fpath, "w") as f:
 43            f.create_dataset("raw", data=curr_raw, compression="gzip")
 44            f.create_dataset("labels", data=curr_label, compression="gzip")
 45
 46
 47def get_parhyale_regen_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 48    """Download the Parhyale Regen dataset.
 49
 50    Args:
 51        path: Filepath to a folder where the downloaded data will be saved.
 52        download: Whether to download the data if it is not present.
 53
 54    Returns:
 55        Filepath where the training data is stored.
 56    """
 57    import requests
 58
 59    data_dir = os.path.join(path, "data")
 60    if os.path.exists(data_dir):
 61        return path
 62
 63    os.makedirs(data_dir, exist_ok=True)
 64
 65    # Download the data from Zenodo via fetching each file.
 66    # NOTE: This data download is implemented because all image and label files are scattered in the link.
 67    url = "https://zenodo.org/api/records/8252039"
 68
 69    for f in requests.get(url).json()["files"]:
 70        fpath = os.path.join(data_dir, f["key"])
 71        print("Downloading:", f["key"])
 72        r = requests.get(f["links"]["self"])
 73        with open(fpath, "wb") as out:
 74            out.write(r.content)
 75
 76    # Preprocess the images to keep the relevant inputs.
 77    _preprocess_data(path, data_dir)
 78
 79    return path
 80
 81
 82def get_parhyale_regen_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
 83    """Get paths for the Parhyale Regen data.
 84
 85    Args:
 86        path: Filepath to a folder where the downloaded data will be saved.
 87        download: Whether to download the data if it is not present.
 88
 89    Returns:
 90        List of filepaths for the volumetric data.
 91    """
 92    data_dir = get_parhyale_regen_data(path, download)
 93    vol_paths = natsorted(glob(os.path.join(data_dir, "preprocessed", "*.h5")))
 94    return vol_paths
 95
 96
 97def get_parhyale_regen_dataset(
 98    path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs
 99) -> Dataset:
100    """Get the Parhyale Regen dataset for nucleus segmentation.
101
102    Args:
103        path: Filepath to a folder where the downloaded data will be saved.
104        patch_shape: The patch shape to use for training.
105        download: Whether to download the data if it is not present.
106        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
107
108    Returns:
109        The segmentation dataset.
110    """
111    volume_paths = get_parhyale_regen_paths(path, download)
112
113    return torch_em.default_segmentation_dataset(
114        raw_paths=volume_paths,
115        raw_key="raw",
116        label_paths=volume_paths,
117        label_key="labels",
118        patch_shape=patch_shape,
119        **kwargs
120    )
121
122
123def get_parhyale_regen_loader(
124    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs
125) -> DataLoader:
126    """Get the Parhyale Regen dataset for nucleus segmentation.
127
128    Args:
129        path: Filepath to a folder where the downloaded data will be saved.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        download: Whether to download the data if it is not present.
133        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
134
135    Returns:
136        The DataLoader.
137    """
138    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
139    dataset = get_parhyale_regen_dataset(path, patch_shape, download, **ds_kwargs)
140    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def get_parhyale_regen_data(path: Union[os.PathLike, str], download: bool = False) -> str:
48def get_parhyale_regen_data(path: Union[os.PathLike, str], download: bool = False) -> str:
49    """Download the Parhyale Regen dataset.
50
51    Args:
52        path: Filepath to a folder where the downloaded data will be saved.
53        download: Whether to download the data if it is not present.
54
55    Returns:
56        Filepath where the training data is stored.
57    """
58    import requests
59
60    data_dir = os.path.join(path, "data")
61    if os.path.exists(data_dir):
62        return path
63
64    os.makedirs(data_dir, exist_ok=True)
65
66    # Download the data from Zenodo via fetching each file.
67    # NOTE: This data download is implemented because all image and label files are scattered in the link.
68    url = "https://zenodo.org/api/records/8252039"
69
70    for f in requests.get(url).json()["files"]:
71        fpath = os.path.join(data_dir, f["key"])
72        print("Downloading:", f["key"])
73        r = requests.get(f["links"]["self"])
74        with open(fpath, "wb") as out:
75            out.write(r.content)
76
77    # Preprocess the images to keep the relevant inputs.
78    _preprocess_data(path, data_dir)
79
80    return path

Download the Parhyale Regen dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the training data is stored.

def get_parhyale_regen_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
83def get_parhyale_regen_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
84    """Get paths for the Parhyale Regen data.
85
86    Args:
87        path: Filepath to a folder where the downloaded data will be saved.
88        download: Whether to download the data if it is not present.
89
90    Returns:
91        List of filepaths for the volumetric data.
92    """
93    data_dir = get_parhyale_regen_data(path, download)
94    vol_paths = natsorted(glob(os.path.join(data_dir, "preprocessed", "*.h5")))
95    return vol_paths

Get paths for the Parhyale Regen data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the volumetric data.

def get_parhyale_regen_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 98def get_parhyale_regen_dataset(
 99    path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs
100) -> Dataset:
101    """Get the Parhyale Regen dataset for nucleus segmentation.
102
103    Args:
104        path: Filepath to a folder where the downloaded data will be saved.
105        patch_shape: The patch shape to use for training.
106        download: Whether to download the data if it is not present.
107        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
108
109    Returns:
110        The segmentation dataset.
111    """
112    volume_paths = get_parhyale_regen_paths(path, download)
113
114    return torch_em.default_segmentation_dataset(
115        raw_paths=volume_paths,
116        raw_key="raw",
117        label_paths=volume_paths,
118        label_key="labels",
119        patch_shape=patch_shape,
120        **kwargs
121    )

Get the Parhyale Regen dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_parhyale_regen_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
124def get_parhyale_regen_loader(
125    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs
126) -> DataLoader:
127    """Get the Parhyale Regen dataset for nucleus segmentation.
128
129    Args:
130        path: Filepath to a folder where the downloaded data will be saved.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        download: Whether to download the data if it is not present.
134        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
135
136    Returns:
137        The DataLoader.
138    """
139    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
140    dataset = get_parhyale_regen_dataset(path, patch_shape, download, **ds_kwargs)
141    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Parhyale Regen dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.