torch_em.data.datasets.light_microscopy.parhyale_regen
The Parhyale Regen dataset contains nucleus annotations for parhyale images from confocal microscope.
The dataset is located at https://zenodo.org/records/8252039. This dataset is from the publication https://doi.org/10.7554/eLife.19766.012. Please cite it if you use this dataset for your research.
1"""The Parhyale Regen dataset contains nucleus annotations for parhyale images from confocal microscope. 2 3The dataset is located at https://zenodo.org/records/8252039. 4This dataset is from the publication https://doi.org/10.7554/eLife.19766.012. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from natsort import natsorted 11from typing import Union, Tuple, List 12 13import imageio.v3 as imageio 14 15from torch.utils.data import Dataset, DataLoader 16 17import torch_em 18 19from .. import util 20 21 22def _preprocess_data(root, path): 23 import h5py 24 25 raw_path = os.path.join(path, "Parhyale_H2B-EGFP_images_tp01-50.tif") 26 assert os.path.exists(raw_path) 27 28 raw = imageio.imread(raw_path) 29 30 # We have limited timepoints annotated, let's extract them first. 31 tps = [0, 10, 20, 30, 40, 49] 32 raw_tps = [raw[i, ...] for i in tps] 33 label_tps = [imageio.imread(p) for p in natsorted(glob(os.path.join(path, "*_instance-segmentation-labels_*.tif")))] 34 35 # Get the new folder where we store the h5 files. 36 new_path = os.path.join(root, "preprocessed") 37 os.makedirs(new_path, exist_ok=True) 38 39 for curr_tp, curr_raw, curr_label in zip(tps, raw_tps, label_tps): 40 # Store each 3d volume per timepoint in their individual h5 files. 41 fpath = os.path.join(new_path, f"Parhyale_H2B-EGFP_{curr_tp + 1}.h5") 42 with h5py.File(fpath, "w") as f: 43 f.create_dataset("raw", data=curr_raw, compression="gzip") 44 f.create_dataset("labels", data=curr_label, compression="gzip") 45 46 47def get_parhyale_regen_data(path: Union[os.PathLike, str], download: bool = False) -> str: 48 """Download the Parhyale Regen dataset. 49 50 Args: 51 path: Filepath to a folder where the downloaded data will be saved. 52 download: Whether to download the data if it is not present. 53 54 Returns: 55 Filepath where the training data is stored. 56 """ 57 import requests 58 59 data_dir = os.path.join(path, "data") 60 if os.path.exists(data_dir): 61 return path 62 63 os.makedirs(data_dir, exist_ok=True) 64 65 # Download the data from Zenodo via fetching each file. 66 # NOTE: This data download is implemented because all image and label files are scattered in the link. 67 url = "https://zenodo.org/api/records/8252039" 68 69 for f in requests.get(url).json()["files"]: 70 fpath = os.path.join(data_dir, f["key"]) 71 print("Downloading:", f["key"]) 72 r = requests.get(f["links"]["self"]) 73 with open(fpath, "wb") as out: 74 out.write(r.content) 75 76 # Preprocess the images to keep the relevant inputs. 77 _preprocess_data(path, data_dir) 78 79 return path 80 81 82def get_parhyale_regen_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]: 83 """Get paths for the Parhyale Regen data. 84 85 Args: 86 path: Filepath to a folder where the downloaded data will be saved. 87 download: Whether to download the data if it is not present. 88 89 Returns: 90 List of filepaths for the volumetric data. 91 """ 92 data_dir = get_parhyale_regen_data(path, download) 93 vol_paths = natsorted(glob(os.path.join(data_dir, "preprocessed", "*.h5"))) 94 return vol_paths 95 96 97def get_parhyale_regen_dataset( 98 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs 99) -> Dataset: 100 """Get the Parhyale Regen dataset for nucleus segmentation. 101 102 Args: 103 path: Filepath to a folder where the downloaded data will be saved. 104 patch_shape: The patch shape to use for training. 105 download: Whether to download the data if it is not present. 106 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 107 108 Returns: 109 The segmentation dataset. 110 """ 111 volume_paths = get_parhyale_regen_paths(path, download) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=volume_paths, 115 raw_key="raw", 116 label_paths=volume_paths, 117 label_key="labels", 118 patch_shape=patch_shape, 119 **kwargs 120 ) 121 122 123def get_parhyale_regen_loader( 124 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs 125) -> DataLoader: 126 """Get the Parhyale Regen dataset for nucleus segmentation. 127 128 Args: 129 path: Filepath to a folder where the downloaded data will be saved. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 download: Whether to download the data if it is not present. 133 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 134 135 Returns: 136 The DataLoader. 137 """ 138 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 139 dataset = get_parhyale_regen_dataset(path, patch_shape, download, **ds_kwargs) 140 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def
get_parhyale_regen_data(path: Union[os.PathLike, str], download: bool = False) -> str:
48def get_parhyale_regen_data(path: Union[os.PathLike, str], download: bool = False) -> str: 49 """Download the Parhyale Regen dataset. 50 51 Args: 52 path: Filepath to a folder where the downloaded data will be saved. 53 download: Whether to download the data if it is not present. 54 55 Returns: 56 Filepath where the training data is stored. 57 """ 58 import requests 59 60 data_dir = os.path.join(path, "data") 61 if os.path.exists(data_dir): 62 return path 63 64 os.makedirs(data_dir, exist_ok=True) 65 66 # Download the data from Zenodo via fetching each file. 67 # NOTE: This data download is implemented because all image and label files are scattered in the link. 68 url = "https://zenodo.org/api/records/8252039" 69 70 for f in requests.get(url).json()["files"]: 71 fpath = os.path.join(data_dir, f["key"]) 72 print("Downloading:", f["key"]) 73 r = requests.get(f["links"]["self"]) 74 with open(fpath, "wb") as out: 75 out.write(r.content) 76 77 # Preprocess the images to keep the relevant inputs. 78 _preprocess_data(path, data_dir) 79 80 return path
Download the Parhyale Regen dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the training data is stored.
def
get_parhyale_regen_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
83def get_parhyale_regen_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]: 84 """Get paths for the Parhyale Regen data. 85 86 Args: 87 path: Filepath to a folder where the downloaded data will be saved. 88 download: Whether to download the data if it is not present. 89 90 Returns: 91 List of filepaths for the volumetric data. 92 """ 93 data_dir = get_parhyale_regen_data(path, download) 94 vol_paths = natsorted(glob(os.path.join(data_dir, "preprocessed", "*.h5"))) 95 return vol_paths
Get paths for the Parhyale Regen data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the volumetric data.
def
get_parhyale_regen_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
98def get_parhyale_regen_dataset( 99 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs 100) -> Dataset: 101 """Get the Parhyale Regen dataset for nucleus segmentation. 102 103 Args: 104 path: Filepath to a folder where the downloaded data will be saved. 105 patch_shape: The patch shape to use for training. 106 download: Whether to download the data if it is not present. 107 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 108 109 Returns: 110 The segmentation dataset. 111 """ 112 volume_paths = get_parhyale_regen_paths(path, download) 113 114 return torch_em.default_segmentation_dataset( 115 raw_paths=volume_paths, 116 raw_key="raw", 117 label_paths=volume_paths, 118 label_key="labels", 119 patch_shape=patch_shape, 120 **kwargs 121 )
Get the Parhyale Regen dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_parhyale_regen_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
124def get_parhyale_regen_loader( 125 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs 126) -> DataLoader: 127 """Get the Parhyale Regen dataset for nucleus segmentation. 128 129 Args: 130 path: Filepath to a folder where the downloaded data will be saved. 131 batch_size: The batch size for training. 132 patch_shape: The patch shape to use for training. 133 download: Whether to download the data if it is not present. 134 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 135 136 Returns: 137 The DataLoader. 138 """ 139 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 140 dataset = get_parhyale_regen_dataset(path, patch_shape, download, **ds_kwargs) 141 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Parhyale Regen dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.