torch_em.data.datasets.light_microscopy.vibrio_cholerae

The Vibrio Cholerae dataset contains 3D confocal fluorescence microscopy images of Vibrio cholerae biofilms with instance segmentation annotations for single-cell segmentation.

The dataset provides two annotation types for 5 biofilm volumes:

  • semi-manual-annotation: all 5 volumes labeled via automated segmentation + manual correction.
  • fully-manual-annotation: 1 cropped volume (biofilm_1) with fully manual annotations — intended as a held-out evaluation set.

NOTE: The semi-manual labels are used by default for training. Whether all cells in each volume are annotated should be verified against the paper before assuming dense coverage.

The dataset is located at https://zenodo.org/records/7704410. This dataset is from the publication https://doi.org/10.1111/mmi.15064. Please cite it if you use this dataset in your research.

  1"""The Vibrio Cholerae dataset contains 3D confocal fluorescence microscopy images
  2of Vibrio cholerae biofilms with instance segmentation annotations for single-cell
  3segmentation.
  4
  5The dataset provides two annotation types for 5 biofilm volumes:
  6- semi-manual-annotation: all 5 volumes labeled via automated segmentation + manual correction.
  7- fully-manual-annotation: 1 cropped volume (biofilm_1) with fully manual annotations —
  8  intended as a held-out evaluation set.
  9
 10NOTE: The semi-manual labels are used by default for training. Whether all cells in each
 11volume are annotated should be verified against the paper before assuming dense coverage.
 12
 13The dataset is located at https://zenodo.org/records/7704410.
 14This dataset is from the publication https://doi.org/10.1111/mmi.15064.
 15Please cite it if you use this dataset in your research.
 16"""
 17
 18import os
 19from glob import glob
 20from natsort import natsorted
 21from typing import List, Tuple, Union
 22
 23from torch.utils.data import Dataset, DataLoader
 24
 25import torch_em
 26
 27from .. import util
 28
 29
 30URL = "https://zenodo.org/records/7704410/files/ZENODO.zip"
 31CHECKSUM = "31edb3edbbd308261ead96fa6ec201aff4daf6a0fa8624462c0384e61d67d4c8"
 32
 33
 34def get_vibrio_cholerae_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 35    """Download the Vibrio Cholerae dataset.
 36
 37    Args:
 38        path: Filepath to a folder where the downloaded data will be saved.
 39        download: Whether to download the data if it is not present.
 40
 41    Returns:
 42        The filepath to the training data directory.
 43    """
 44    data_dir = os.path.join(path, "training-data-from-experimentally-acquired-images")
 45    if os.path.exists(data_dir):
 46        return data_dir
 47
 48    os.makedirs(path, exist_ok=True)
 49    zip_path = os.path.join(path, "ZENODO.zip")
 50    util.download_source(zip_path, URL, download, checksum=CHECKSUM)
 51    util.unzip(zip_path, path)
 52
 53    return data_dir
 54
 55
 56def get_vibrio_cholerae_paths(
 57    path: Union[os.PathLike, str], download: bool = False,
 58) -> Tuple[List[str], List[str]]:
 59    """Get paths to the Vibrio Cholerae data.
 60
 61    Args:
 62        path: Filepath to a folder where the downloaded data will be saved.
 63        download: Whether to download the data if it is not present.
 64
 65    Returns:
 66        List of filepaths for the image data.
 67        List of filepaths for the label data.
 68    """
 69    data_dir = get_vibrio_cholerae_data(path, download)
 70
 71    raw_paths = natsorted(glob(os.path.join(data_dir, "raw-data", "*_raw.tif")))
 72    label_paths = natsorted(glob(os.path.join(data_dir, "semi-manual-annotation", "*_labels.tif")))
 73
 74    if len(raw_paths) == 0:
 75        raise RuntimeError(
 76            f"No image files found in {os.path.join(data_dir, 'raw-data')}. "
 77            "Please check the dataset structure."
 78        )
 79    if len(raw_paths) != len(label_paths):
 80        raise RuntimeError(
 81            f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match."
 82        )
 83
 84    return raw_paths, label_paths
 85
 86
 87def get_vibrio_cholerae_dataset(
 88    path: Union[os.PathLike, str],
 89    patch_shape: Tuple[int, ...],
 90    download: bool = False,
 91    **kwargs,
 92) -> Dataset:
 93    """Get the Vibrio Cholerae dataset for 3D cell instance segmentation.
 94
 95    Args:
 96        path: Filepath to a folder where the downloaded data will be saved.
 97        patch_shape: The patch shape to use for training.
 98        download: Whether to download the data if it is not present.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
100
101    Returns:
102        The segmentation dataset.
103    """
104    raw_paths, label_paths = get_vibrio_cholerae_paths(path, download)
105
106    return torch_em.default_segmentation_dataset(
107        raw_paths=raw_paths,
108        raw_key=None,
109        label_paths=label_paths,
110        label_key=None,
111        patch_shape=patch_shape,
112        **kwargs,
113    )
114
115
116def get_vibrio_cholerae_loader(
117    path: Union[os.PathLike, str],
118    batch_size: int,
119    patch_shape: Tuple[int, ...],
120    download: bool = False,
121    **kwargs,
122) -> DataLoader:
123    """Get the Vibrio Cholerae dataloader for 3D cell instance segmentation.
124
125    Args:
126        path: Filepath to a folder where the downloaded data will be saved.
127        batch_size: The batch size for training.
128        patch_shape: The patch shape to use for training.
129        download: Whether to download the data if it is not present.
130        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
131
132    Returns:
133        The DataLoader.
134    """
135    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
136    dataset = get_vibrio_cholerae_dataset(path, patch_shape, download, **ds_kwargs)
137    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/7704410/files/ZENODO.zip'
CHECKSUM = '31edb3edbbd308261ead96fa6ec201aff4daf6a0fa8624462c0384e61d67d4c8'
def get_vibrio_cholerae_data(path: Union[os.PathLike, str], download: bool = False) -> str:
35def get_vibrio_cholerae_data(path: Union[os.PathLike, str], download: bool = False) -> str:
36    """Download the Vibrio Cholerae dataset.
37
38    Args:
39        path: Filepath to a folder where the downloaded data will be saved.
40        download: Whether to download the data if it is not present.
41
42    Returns:
43        The filepath to the training data directory.
44    """
45    data_dir = os.path.join(path, "training-data-from-experimentally-acquired-images")
46    if os.path.exists(data_dir):
47        return data_dir
48
49    os.makedirs(path, exist_ok=True)
50    zip_path = os.path.join(path, "ZENODO.zip")
51    util.download_source(zip_path, URL, download, checksum=CHECKSUM)
52    util.unzip(zip_path, path)
53
54    return data_dir

Download the Vibrio Cholerae dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the training data directory.

def get_vibrio_cholerae_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
57def get_vibrio_cholerae_paths(
58    path: Union[os.PathLike, str], download: bool = False,
59) -> Tuple[List[str], List[str]]:
60    """Get paths to the Vibrio Cholerae data.
61
62    Args:
63        path: Filepath to a folder where the downloaded data will be saved.
64        download: Whether to download the data if it is not present.
65
66    Returns:
67        List of filepaths for the image data.
68        List of filepaths for the label data.
69    """
70    data_dir = get_vibrio_cholerae_data(path, download)
71
72    raw_paths = natsorted(glob(os.path.join(data_dir, "raw-data", "*_raw.tif")))
73    label_paths = natsorted(glob(os.path.join(data_dir, "semi-manual-annotation", "*_labels.tif")))
74
75    if len(raw_paths) == 0:
76        raise RuntimeError(
77            f"No image files found in {os.path.join(data_dir, 'raw-data')}. "
78            "Please check the dataset structure."
79        )
80    if len(raw_paths) != len(label_paths):
81        raise RuntimeError(
82            f"Number of images ({len(raw_paths)}) and labels ({len(label_paths)}) do not match."
83        )
84
85    return raw_paths, label_paths

Get paths to the Vibrio Cholerae data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_vibrio_cholerae_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 88def get_vibrio_cholerae_dataset(
 89    path: Union[os.PathLike, str],
 90    patch_shape: Tuple[int, ...],
 91    download: bool = False,
 92    **kwargs,
 93) -> Dataset:
 94    """Get the Vibrio Cholerae dataset for 3D cell instance segmentation.
 95
 96    Args:
 97        path: Filepath to a folder where the downloaded data will be saved.
 98        patch_shape: The patch shape to use for training.
 99        download: Whether to download the data if it is not present.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103        The segmentation dataset.
104    """
105    raw_paths, label_paths = get_vibrio_cholerae_paths(path, download)
106
107    return torch_em.default_segmentation_dataset(
108        raw_paths=raw_paths,
109        raw_key=None,
110        label_paths=label_paths,
111        label_key=None,
112        patch_shape=patch_shape,
113        **kwargs,
114    )

Get the Vibrio Cholerae dataset for 3D cell instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_vibrio_cholerae_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
117def get_vibrio_cholerae_loader(
118    path: Union[os.PathLike, str],
119    batch_size: int,
120    patch_shape: Tuple[int, ...],
121    download: bool = False,
122    **kwargs,
123) -> DataLoader:
124    """Get the Vibrio Cholerae dataloader for 3D cell instance segmentation.
125
126    Args:
127        path: Filepath to a folder where the downloaded data will be saved.
128        batch_size: The batch size for training.
129        patch_shape: The patch shape to use for training.
130        download: Whether to download the data if it is not present.
131        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
132
133    Returns:
134        The DataLoader.
135    """
136    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
137    dataset = get_vibrio_cholerae_dataset(path, patch_shape, download, **ds_kwargs)
138    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Vibrio Cholerae dataloader for 3D cell instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.