torch_em.data.datasets.light_microscopy.pnas_arabidopsis
The PNAS Arabidopsis dataset contains cell segmentation in confocal microscopy images of arabidopsis plantlets.
NOTE: There is tracking information available for this data.
This dataset is from the publication https://doi.org/10.1073/pnas.1616768113. Please cite it if you use this dataset for your research.
1"""The PNAS Arabidopsis dataset contains cell segmentation in confocal microscopy images of 2arabidopsis plantlets. 3 4NOTE: There is tracking information available for this data. 5 6This dataset is from the publication https://doi.org/10.1073/pnas.1616768113. 7Please cite it if you use this dataset for your research. 8""" 9 10import os 11import shutil 12from glob import glob 13from tqdm import tqdm 14from pathlib import Path 15from natsort import natsorted 16from typing import Union, Tuple, List 17 18import imageio.v3 as imageio 19 20from torch.utils.data import Dataset, DataLoader 21 22import torch_em 23 24from .. import util 25 26 27URL = "https://www.repository.cam.ac.uk/bitstream/handle/1810/262530/PNAS.zip?sequence=4&isAllowed=y" 28CHECKSUM = "39341398389baf6d93c3f652b7e2e8aedc5579c29dfaf2b82b41ebfc3caa05c4" 29 30 31def get_pnas_arabidopsis_data(path: Union[os.PathLike, str], download: bool = False) -> str: 32 """Download the PNAS Arabidopsis dataset. 33 34 Args: 35 path: Filepath to a folder where the data is downloaded for further processing. 36 download: Whether to download the data if it is not present. 37 38 Returns: 39 Filepath where the data is downloaded and pre-processed. 40 """ 41 data_dir = os.path.join(path, "data") 42 if os.path.exists(data_dir): 43 return data_dir 44 45 os.makedirs(data_dir) 46 47 zip_path = os.path.join(path, "PNAS.zip") 48 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 49 util.unzip(zip_path=zip_path, dst=data_dir) 50 51 # Convert the data to h5 (It's hard to keep a track of filenames as they are not completely consistent) 52 import h5py 53 54 raw_paths = natsorted(glob(os.path.join(data_dir, "PNAS", "plant*", "processed_tiffs", "*trim-acylYFP.tif"))) 55 for rpath in tqdm(raw_paths, desc="Preprocessing images"): 56 # Let's find the label. 57 label_path = rpath.replace("processed_tiffs", "segmentation_tiffs") 58 label_path = glob(label_path.replace(".tif", "*.tif")) 59 60 if len(label_path) != 1: 61 print(f"It seems like there are no matching labels for '{os.path.basename(rpath)}'.") 62 continue 63 64 label_path = label_path[0] 65 66 raw = imageio.imread(rpath) 67 labels = imageio.imread(label_path) 68 69 # Store both image and corresponding labels in a h5 file. 70 vol_path = os.path.join(data_dir, Path(os.path.basename(rpath)).with_suffix(".h5")) 71 with h5py.File(vol_path, "w") as f: 72 f.create_dataset("raw", data=raw, dtype=raw.dtype, compression="gzip") 73 f.create_dataset("labels", data=labels, dtype=labels.dtype, compression="gzip") 74 75 # Remove old data folder 76 shutil.rmtree(os.path.join(path, "data", "PNAS")) 77 78 return data_dir 79 80 81def get_pnas_arabidopsis_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]: 82 """Get paths to the PNAS Arabidopsis data. 83 84 Args: 85 path: Filepath to a folder where the data is downloaded for further processing. 86 download: Whether to download the data if it is not present. 87 88 Returns: 89 List of filepaths for the volumetric data. 90 """ 91 data_dir = get_pnas_arabidopsis_data(path, download) 92 volume_paths = glob(os.path.join(data_dir, "*.h5")) 93 return volume_paths 94 95 96def get_pnas_arabidopsis_dataset( 97 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs 98) -> Dataset: 99 """Get the PNAS Arabidopsis dataset for cell segmentation. 100 101 Args: 102 path: Filepath to a folder where the data is downloaded for further processing. 103 patch_shape: The patch shape to use for training. 104 download: Whether to download the data if it is not present. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 106 107 Returns: 108 The segmentation dataset. 109 """ 110 volume_paths = get_pnas_arabidopsis_paths(path, download) 111 112 return torch_em.default_segmentation_dataset( 113 raw_paths=volume_paths, 114 raw_key="raw", 115 label_paths=volume_paths, 116 label_key="labels", 117 patch_shape=patch_shape, 118 is_seg_dataset=True, 119 **kwargs 120 ) 121 122 123def get_pnas_arabidopsis_loader( 124 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs 125) -> DataLoader: 126 """Get the PNAS Arabidopsis dataset for cell segmentation. 127 128 Args: 129 path: Filepath to a folder where the data is downloaded for further processing. 130 batch_size: The batch size for training. 131 patch_shape: The patch shape to use for training. 132 download: Whether to download the data if it is not present. 133 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 134 135 Returns: 136 The segmentation dataset. 137 """ 138 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 139 dataset = get_pnas_arabidopsis_dataset(path, patch_shape, download, **ds_kwargs) 140 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://www.repository.cam.ac.uk/bitstream/handle/1810/262530/PNAS.zip?sequence=4&isAllowed=y'
CHECKSUM =
'39341398389baf6d93c3f652b7e2e8aedc5579c29dfaf2b82b41ebfc3caa05c4'
def
get_pnas_arabidopsis_data(path: Union[os.PathLike, str], download: bool = False) -> str:
32def get_pnas_arabidopsis_data(path: Union[os.PathLike, str], download: bool = False) -> str: 33 """Download the PNAS Arabidopsis dataset. 34 35 Args: 36 path: Filepath to a folder where the data is downloaded for further processing. 37 download: Whether to download the data if it is not present. 38 39 Returns: 40 Filepath where the data is downloaded and pre-processed. 41 """ 42 data_dir = os.path.join(path, "data") 43 if os.path.exists(data_dir): 44 return data_dir 45 46 os.makedirs(data_dir) 47 48 zip_path = os.path.join(path, "PNAS.zip") 49 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 50 util.unzip(zip_path=zip_path, dst=data_dir) 51 52 # Convert the data to h5 (It's hard to keep a track of filenames as they are not completely consistent) 53 import h5py 54 55 raw_paths = natsorted(glob(os.path.join(data_dir, "PNAS", "plant*", "processed_tiffs", "*trim-acylYFP.tif"))) 56 for rpath in tqdm(raw_paths, desc="Preprocessing images"): 57 # Let's find the label. 58 label_path = rpath.replace("processed_tiffs", "segmentation_tiffs") 59 label_path = glob(label_path.replace(".tif", "*.tif")) 60 61 if len(label_path) != 1: 62 print(f"It seems like there are no matching labels for '{os.path.basename(rpath)}'.") 63 continue 64 65 label_path = label_path[0] 66 67 raw = imageio.imread(rpath) 68 labels = imageio.imread(label_path) 69 70 # Store both image and corresponding labels in a h5 file. 71 vol_path = os.path.join(data_dir, Path(os.path.basename(rpath)).with_suffix(".h5")) 72 with h5py.File(vol_path, "w") as f: 73 f.create_dataset("raw", data=raw, dtype=raw.dtype, compression="gzip") 74 f.create_dataset("labels", data=labels, dtype=labels.dtype, compression="gzip") 75 76 # Remove old data folder 77 shutil.rmtree(os.path.join(path, "data", "PNAS")) 78 79 return data_dir
Download the PNAS Arabidopsis dataset.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded and pre-processed.
def
get_pnas_arabidopsis_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
82def get_pnas_arabidopsis_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]: 83 """Get paths to the PNAS Arabidopsis data. 84 85 Args: 86 path: Filepath to a folder where the data is downloaded for further processing. 87 download: Whether to download the data if it is not present. 88 89 Returns: 90 List of filepaths for the volumetric data. 91 """ 92 data_dir = get_pnas_arabidopsis_data(path, download) 93 volume_paths = glob(os.path.join(data_dir, "*.h5")) 94 return volume_paths
Get paths to the PNAS Arabidopsis data.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the volumetric data.
def
get_pnas_arabidopsis_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
97def get_pnas_arabidopsis_dataset( 98 path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs 99) -> Dataset: 100 """Get the PNAS Arabidopsis dataset for cell segmentation. 101 102 Args: 103 path: Filepath to a folder where the data is downloaded for further processing. 104 patch_shape: The patch shape to use for training. 105 download: Whether to download the data if it is not present. 106 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 107 108 Returns: 109 The segmentation dataset. 110 """ 111 volume_paths = get_pnas_arabidopsis_paths(path, download) 112 113 return torch_em.default_segmentation_dataset( 114 raw_paths=volume_paths, 115 raw_key="raw", 116 label_paths=volume_paths, 117 label_key="labels", 118 patch_shape=patch_shape, 119 is_seg_dataset=True, 120 **kwargs 121 )
Get the PNAS Arabidopsis dataset for cell segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_pnas_arabidopsis_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
124def get_pnas_arabidopsis_loader( 125 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs 126) -> DataLoader: 127 """Get the PNAS Arabidopsis dataset for cell segmentation. 128 129 Args: 130 path: Filepath to a folder where the data is downloaded for further processing. 131 batch_size: The batch size for training. 132 patch_shape: The patch shape to use for training. 133 download: Whether to download the data if it is not present. 134 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 135 136 Returns: 137 The segmentation dataset. 138 """ 139 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 140 dataset = get_pnas_arabidopsis_dataset(path, patch_shape, download, **ds_kwargs) 141 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the PNAS Arabidopsis dataset for cell segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.