torch_em.data.datasets.light_microscopy.cvz_fluo

The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in fluorescence microscopy images.

The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z. Please cite it if you use this dataset for your research.

  1"""The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in
  2fluorescence microscopy images.
  3
  4The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from tqdm import tqdm
 11from pathlib import Path
 12from natsort import natsorted
 13from typing import Union, Literal, Tuple, Optional, List
 14
 15import imageio.v3 as imageio
 16from skimage.measure import label as connected_components
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23from .neurips_cell_seg import to_rgb
 24
 25
 26URL = "https://www.synapse.org/Synapse:syn27624812/"
 27
 28
 29def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
 30    """Download the CVZ-Fluo dataset.
 31
 32    Args:
 33        path: Filepath to a folder where the downloaded data is saved.
 34        download: Whether to download the data if it is not present.
 35    """
 36    data_dir = os.path.join(path, r"Annotation Panel Table.xlsx")
 37    if not os.path.exists(data_dir):
 38        os.makedirs(path, exist_ok=True)
 39        # Download the dataset from 'synapse'.
 40        util.download_source_synapse(path=path, entity="syn27624812", download=download)
 41
 42    return
 43
 44
 45def _preprocess_labels(label_paths):
 46    neu_label_paths, to_process = [], []
 47
 48    # First, make simple checks to avoid redundant progress bar runs.
 49    for lpath in label_paths:
 50        neu_lpath = lpath.replace(".png", ".tif")
 51        neu_label_paths.append(neu_lpath)
 52
 53        if not os.path.exists(neu_lpath):
 54            to_process.append((lpath, neu_lpath))
 55
 56    if to_process:  # Next, process valid inputs.
 57        for lpath, neu_lpath in tqdm(to_process, desc="Preprocessing labels"):
 58            if not os.path.exists(lpath):  # HACK: Some paths have weird spacing nomenclature.
 59                lpath = Path(lpath).parent / rf" {os.path.basename(lpath)}"
 60
 61            label = imageio.imread(lpath)
 62            imageio.imwrite(neu_lpath, connected_components(label).astype(label.dtype), compression="zlib")
 63
 64    return neu_label_paths
 65
 66
 67def get_cvz_fluo_paths(
 68    path: Union[os.PathLike, str],
 69    stain_choice: Literal["cell", "dapi"],
 70    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
 71    download: bool = False,
 72) -> Tuple[List[str], List[str]]:
 73    """Get paths to the CVZ-Fluo data.
 74
 75    Args:
 76        path: Filepath to a folder where the downloaded data will be saved.
 77        download: Whether to download the data if it is not present.
 78
 79    Returns:
 80        List of filepaths for the image data.
 81        List of filepaths for the label data.
 82    """
 83    get_cvz_fluo_data(path, download)
 84
 85    if data_choice is None:
 86        data_choice = "**"
 87    else:
 88        if data_choice == "Zeiss" and stain_choice == "dapi":
 89            raise ValueError("'Zeiss' data does not have DAPI stained images.")
 90
 91        data_choice = f"{data_choice}/**"
 92
 93    if stain_choice not in ["cell", "dapi"]:
 94        raise ValueError(f"'{stain_choice}' is not a valid stain choice.")
 95
 96    raw_paths = natsorted(
 97        glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True)
 98    )
 99    label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths]
100    label_paths = _preprocess_labels(label_paths)
101
102    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
103
104    return raw_paths, label_paths
105
106
107def get_cvz_fluo_dataset(
108    path: Union[os.PathLike, str],
109    patch_shape: Tuple[int, int],
110    stain_choice: Literal["cell", "dapi"],
111    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
112    download: bool = False,
113    **kwargs
114) -> Dataset:
115    """Get the CVZ-Fluo dataset for cell and nucleus segmentation.
116
117    Args:
118        path: Filepath to a folder where the downloaded data will be saved.
119        patch_shape: The patch shape to use for training.
120        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
121        data_choice: The choice of dataset.
122        download: Whether to download the data if it is not present.
123        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
124
125    Returns:
126        The segmentation dataset.
127    """
128    raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download)
129
130    if "raw_transform" not in kwargs:
131        kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
132
133    if "transform" not in kwargs:
134        kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2)
135
136    return torch_em.default_segmentation_dataset(
137        raw_paths=raw_paths,
138        raw_key=None,
139        label_paths=label_paths,
140        label_key=None,
141        is_seg_dataset=False,
142        patch_shape=patch_shape,
143        **kwargs
144    )
145
146
147def get_cvz_fluo_loader(
148    path: Union[os.PathLike, str],
149    batch_size: int,
150    patch_shape: Tuple[int, int],
151    stain_choice: Literal["cell", "dapi"],
152    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
153    download: bool = False,
154    **kwargs
155) -> DataLoader:
156    """Get the CVZ-Fluo dataloader for cell and nucleus segmentation.
157
158    Args:
159        path: Filepath to a folder where the downloaded data will be saved.
160        batch_size: The batch size for training
161        patch_shape: The patch shape to use for training.
162        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
163        data_choice: The choice of dataset.
164        download: Whether to download the data if it is not present.
165        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
166
167    Returns:
168        The DataLoader.
169    """
170    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
171    dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs)
172    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://www.synapse.org/Synapse:syn27624812/'
def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
30def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
31    """Download the CVZ-Fluo dataset.
32
33    Args:
34        path: Filepath to a folder where the downloaded data is saved.
35        download: Whether to download the data if it is not present.
36    """
37    data_dir = os.path.join(path, r"Annotation Panel Table.xlsx")
38    if not os.path.exists(data_dir):
39        os.makedirs(path, exist_ok=True)
40        # Download the dataset from 'synapse'.
41        util.download_source_synapse(path=path, entity="syn27624812", download=download)
42
43    return

Download the CVZ-Fluo dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data is saved.
  • download: Whether to download the data if it is not present.
def get_cvz_fluo_paths( path: Union[os.PathLike, str], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
 68def get_cvz_fluo_paths(
 69    path: Union[os.PathLike, str],
 70    stain_choice: Literal["cell", "dapi"],
 71    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
 72    download: bool = False,
 73) -> Tuple[List[str], List[str]]:
 74    """Get paths to the CVZ-Fluo data.
 75
 76    Args:
 77        path: Filepath to a folder where the downloaded data will be saved.
 78        download: Whether to download the data if it is not present.
 79
 80    Returns:
 81        List of filepaths for the image data.
 82        List of filepaths for the label data.
 83    """
 84    get_cvz_fluo_data(path, download)
 85
 86    if data_choice is None:
 87        data_choice = "**"
 88    else:
 89        if data_choice == "Zeiss" and stain_choice == "dapi":
 90            raise ValueError("'Zeiss' data does not have DAPI stained images.")
 91
 92        data_choice = f"{data_choice}/**"
 93
 94    if stain_choice not in ["cell", "dapi"]:
 95        raise ValueError(f"'{stain_choice}' is not a valid stain choice.")
 96
 97    raw_paths = natsorted(
 98        glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True)
 99    )
100    label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths]
101    label_paths = _preprocess_labels(label_paths)
102
103    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
104
105    return raw_paths, label_paths

Get paths to the CVZ-Fluo data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_cvz_fluo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
108def get_cvz_fluo_dataset(
109    path: Union[os.PathLike, str],
110    patch_shape: Tuple[int, int],
111    stain_choice: Literal["cell", "dapi"],
112    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
113    download: bool = False,
114    **kwargs
115) -> Dataset:
116    """Get the CVZ-Fluo dataset for cell and nucleus segmentation.
117
118    Args:
119        path: Filepath to a folder where the downloaded data will be saved.
120        patch_shape: The patch shape to use for training.
121        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
122        data_choice: The choice of dataset.
123        download: Whether to download the data if it is not present.
124        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
125
126    Returns:
127        The segmentation dataset.
128    """
129    raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download)
130
131    if "raw_transform" not in kwargs:
132        kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
133
134    if "transform" not in kwargs:
135        kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2)
136
137    return torch_em.default_segmentation_dataset(
138        raw_paths=raw_paths,
139        raw_key=None,
140        label_paths=label_paths,
141        label_key=None,
142        is_seg_dataset=False,
143        patch_shape=patch_shape,
144        **kwargs
145    )

Get the CVZ-Fluo dataset for cell and nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
  • data_choice: The choice of dataset.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_cvz_fluo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
148def get_cvz_fluo_loader(
149    path: Union[os.PathLike, str],
150    batch_size: int,
151    patch_shape: Tuple[int, int],
152    stain_choice: Literal["cell", "dapi"],
153    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
154    download: bool = False,
155    **kwargs
156) -> DataLoader:
157    """Get the CVZ-Fluo dataloader for cell and nucleus segmentation.
158
159    Args:
160        path: Filepath to a folder where the downloaded data will be saved.
161        batch_size: The batch size for training
162        patch_shape: The patch shape to use for training.
163        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
164        data_choice: The choice of dataset.
165        download: Whether to download the data if it is not present.
166        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
167
168    Returns:
169        The DataLoader.
170    """
171    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
172    dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs)
173    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the CVZ-Fluo dataloader for cell and nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training
  • patch_shape: The patch shape to use for training.
  • stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
  • data_choice: The choice of dataset.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.