torch_em.data.datasets.light_microscopy.vibrio_cholerae
The Vibrio Cholerae dataset contains 3D confocal fluorescence microscopy images of Vibrio cholerae biofilms with instance segmentation annotations for single-cell segmentation.
The dataset provides two annotation types for 5 biofilm volumes:
- semi-manual-annotation: all 5 volumes labeled via automated segmentation + manual correction.
- fully-manual-annotation: 1 cropped volume (biofilm_1) with fully manual annotations — intended as a held-out evaluation set.
NOTE: The semi-manual labels are used by default for training. Whether all cells in each volume are annotated should be verified against the paper before assuming dense coverage.
The dataset is located at https://zenodo.org/records/7704410. This dataset is from the publication https://doi.org/10.1111/mmi.15064. Please cite it if you use this dataset in your research.
1"""The Vibrio Cholerae dataset contains 3D confocal fluorescence microscopy images 2of Vibrio cholerae biofilms with instance segmentation annotations for single-cell 3segmentation. 4 5The dataset provides two annotation types for 5 biofilm volumes: 6- semi-manual-annotation: all 5 volumes labeled via automated segmentation + manual correction. 7- fully-manual-annotation: 1 cropped volume (biofilm_1) with fully manual annotations — 8 intended as a held-out evaluation set. 9 10NOTE: The semi-manual labels are used by default for training. Whether all cells in each 11volume are annotated should be verified against the paper before assuming dense coverage. 12 13The dataset is located at https://zenodo.org/records/7704410. 14This dataset is from the publication https://doi.org/10.1111/mmi.15064. 15Please cite it if you use this dataset in your research. 16""" 17 18import os 19from glob import glob 20from natsort import natsorted 21from typing import List, Tuple, Union 22 23from torch.utils.data import Dataset, DataLoader 24 25import torch_em 26 27from .. import util 28 29 30URL = "https://zenodo.org/records/7704410/files/ZENODO.zip" 31CHECKSUM = "31edb3edbbd308261ead96fa6ec201aff4daf6a0fa8624462c0384e61d67d4c8" 32 33 34def get_vibrio_cholerae_data(path: Union[os.PathLike, str], download: bool = False) -> str: 35 """Download the Vibrio Cholerae dataset. 36 37 Args: 38 path: Filepath to a folder where the downloaded data will be saved. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 The filepath to the training data directory. 43 """ 44 data_dir = os.path.join(path, "training-data-from-experimentally-acquired-images") 45 if os.path.exists(data_dir): 46 return data_dir 47 48 os.makedirs(path, exist_ok=True) 49 zip_path = os.path.join(path, "ZENODO.zip") 50 util.download_source(zip_path, URL, download, checksum=CHECKSUM) 51 util.unzip(zip_path, path) 52 53 return data_dir 54 55 56def get_vibrio_cholerae_paths( 57 path: Union[os.PathLike, str], download: bool = False, 58) -> Tuple[List[str], List[str]]: 59 """Get paths to the Vibrio Cholerae data. 60 61 Args: 62 path: Filepath to a folder where the downloaded data will be saved. 63 download: Whether to download the data if it is not present. 64 65 Returns: 66 List of filepaths for the image data. 67 List of filepaths for the label data. 68 """ 69 data_dir = get_vibrio_cholerae_data(path, download) 70 71 raw_paths = natsorted(glob(os.path.join(data_dir, "raw-data", "*_raw.tif"))) 72 label_paths = natsorted(glob(os.path.join(data_dir, "semi-manual-annotation", "*_labels.tif"))) 73 74 if len(raw_paths) == 0: 75 raise RuntimeError( 76 f"No image files found in {os.path.join(data_dir, 'raw-data')}. " 77 "Please check the dataset structure." 78 ) 79 if len(raw_paths) != len(label_paths): 80 raise RuntimeError( 81 f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match." 82 ) 83 84 return raw_paths, label_paths 85 86 87def get_vibrio_cholerae_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, ...], 90 download: bool = False, 91 **kwargs, 92) -> Dataset: 93 """Get the Vibrio Cholerae dataset for 3D cell instance segmentation. 94 95 Args: 96 path: Filepath to a folder where the downloaded data will be saved. 97 patch_shape: The patch shape to use for training. 98 download: Whether to download the data if it is not present. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 100 101 Returns: 102 The segmentation dataset. 103 """ 104 raw_paths, label_paths = get_vibrio_cholerae_paths(path, download) 105 106 return torch_em.default_segmentation_dataset( 107 raw_paths=raw_paths, 108 raw_key=None, 109 label_paths=label_paths, 110 label_key=None, 111 patch_shape=patch_shape, 112 **kwargs, 113 ) 114 115 116def get_vibrio_cholerae_loader( 117 path: Union[os.PathLike, str], 118 batch_size: int, 119 patch_shape: Tuple[int, ...], 120 download: bool = False, 121 **kwargs, 122) -> DataLoader: 123 """Get the Vibrio Cholerae dataloader for 3D cell instance segmentation. 124 125 Args: 126 path: Filepath to a folder where the downloaded data will be saved. 127 batch_size: The batch size for training. 128 patch_shape: The patch shape to use for training. 129 download: Whether to download the data if it is not present. 130 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 131 132 Returns: 133 The DataLoader. 134 """ 135 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 136 dataset = get_vibrio_cholerae_dataset(path, patch_shape, download, **ds_kwargs) 137 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
35def get_vibrio_cholerae_data(path: Union[os.PathLike, str], download: bool = False) -> str: 36 """Download the Vibrio Cholerae dataset. 37 38 Args: 39 path: Filepath to a folder where the downloaded data will be saved. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 The filepath to the training data directory. 44 """ 45 data_dir = os.path.join(path, "training-data-from-experimentally-acquired-images") 46 if os.path.exists(data_dir): 47 return data_dir 48 49 os.makedirs(path, exist_ok=True) 50 zip_path = os.path.join(path, "ZENODO.zip") 51 util.download_source(zip_path, URL, download, checksum=CHECKSUM) 52 util.unzip(zip_path, path) 53 54 return data_dir
Download the Vibrio Cholerae dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data directory.
57def get_vibrio_cholerae_paths( 58 path: Union[os.PathLike, str], download: bool = False, 59) -> Tuple[List[str], List[str]]: 60 """Get paths to the Vibrio Cholerae data. 61 62 Args: 63 path: Filepath to a folder where the downloaded data will be saved. 64 download: Whether to download the data if it is not present. 65 66 Returns: 67 List of filepaths for the image data. 68 List of filepaths for the label data. 69 """ 70 data_dir = get_vibrio_cholerae_data(path, download) 71 72 raw_paths = natsorted(glob(os.path.join(data_dir, "raw-data", "*_raw.tif"))) 73 label_paths = natsorted(glob(os.path.join(data_dir, "semi-manual-annotation", "*_labels.tif"))) 74 75 if len(raw_paths) == 0: 76 raise RuntimeError( 77 f"No image files found in {os.path.join(data_dir, 'raw-data')}. " 78 "Please check the dataset structure." 79 ) 80 if len(raw_paths) != len(label_paths): 81 raise RuntimeError( 82 f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match." 83 ) 84 85 return raw_paths, label_paths
Get paths to the Vibrio Cholerae data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
88def get_vibrio_cholerae_dataset( 89 path: Union[os.PathLike, str], 90 patch_shape: Tuple[int, ...], 91 download: bool = False, 92 **kwargs, 93) -> Dataset: 94 """Get the Vibrio Cholerae dataset for 3D cell instance segmentation. 95 96 Args: 97 path: Filepath to a folder where the downloaded data will be saved. 98 patch_shape: The patch shape to use for training. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 raw_paths, label_paths = get_vibrio_cholerae_paths(path, download) 106 107 return torch_em.default_segmentation_dataset( 108 raw_paths=raw_paths, 109 raw_key=None, 110 label_paths=label_paths, 111 label_key=None, 112 patch_shape=patch_shape, 113 **kwargs, 114 )
Get the Vibrio Cholerae dataset for 3D cell instance segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
117def get_vibrio_cholerae_loader( 118 path: Union[os.PathLike, str], 119 batch_size: int, 120 patch_shape: Tuple[int, ...], 121 download: bool = False, 122 **kwargs, 123) -> DataLoader: 124 """Get the Vibrio Cholerae dataloader for 3D cell instance segmentation. 125 126 Args: 127 path: Filepath to a folder where the downloaded data will be saved. 128 batch_size: The batch size for training. 129 patch_shape: The patch shape to use for training. 130 download: Whether to download the data if it is not present. 131 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 132 133 Returns: 134 The DataLoader. 135 """ 136 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 137 dataset = get_vibrio_cholerae_dataset(path, patch_shape, download, **ds_kwargs) 138 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Vibrio Cholerae dataloader for 3D cell instance segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.