torch_em.data.datasets.light_microscopy.cvz_fluo

The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in fluorescence microscopy images.

The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z. Please cite it if you use this dataset for your research.

View Source

  1"""The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in
  2fluorescence microscopy images.
  3
  4The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from tqdm import tqdm
 11from pathlib import Path
 12from natsort import natsorted
 13from typing import Union, Literal, Tuple, Optional, List
 14
 15import imageio.v3 as imageio
 16from skimage.measure import label as connected_components
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23from .neurips_cell_seg import to_rgb
 24
 25
 26URL = "https://www.synapse.org/Synapse:syn27624812/"
 27
 28
 29def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
 30    """Download the CVZ-Fluo dataset.
 31
 32    Args:
 33        path: Filepath to a folder where the downloaded data is saved.
 34        download: Whether to download the data if it is not present.
 35    """
 36    data_dir = os.path.join(path, r"Annotation Panel Table.xlsx")
 37    if not os.path.exists(data_dir):
 38        os.makedirs(path, exist_ok=True)
 39        # Download the dataset from 'synapse'.
 40        util.download_source_synapse(path=path, entity="syn27624812", download=download)
 41
 42    return
 43
 44
 45def _preprocess_labels(label_paths):
 46    neu_label_paths = []
 47    for lpath in tqdm(label_paths, desc="Preprocessing labels"):
 48        neu_lpath = lpath.replace(".png", ".tif")
 49        neu_label_paths.append(neu_lpath)
 50        if os.path.exists(neu_lpath):
 51            continue
 52
 53        if not os.path.exists(lpath):  # HACK: some paths have weird spacing nomenclature.
 54            lpath = Path(lpath).parent / rf" {os.path.basename(lpath)}"
 55
 56        label = imageio.imread(lpath)
 57        imageio.imwrite(neu_lpath, connected_components(label).astype(label.dtype), compression="zlib")
 58
 59    return neu_label_paths
 60
 61
 62def get_cvz_fluo_paths(
 63    path: Union[os.PathLike, str],
 64    stain_choice: Literal["cell", "dapi"],
 65    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
 66    download: bool = False,
 67) -> Tuple[List[str], List[str]]:
 68    """Get paths to the CVZ-Fluo data.
 69
 70    Args:
 71        path: Filepath to a folder where the downloaded data will be saved.
 72        download: Whether to download the data if it is not present.
 73
 74    Returns:
 75        List of filepaths for the image data.
 76        List of filepaths for the label data.
 77    """
 78    get_cvz_fluo_data(path, download)
 79
 80    if data_choice is None:
 81        data_choice = "**"
 82    else:
 83        if data_choice == "Zeiss" and stain_choice == "dapi":
 84            raise ValueError("'Zeiss' data does not have DAPI stained images.")
 85
 86        data_choice = f"{data_choice}/**"
 87
 88    if stain_choice not in ["cell", "dapi"]:
 89        raise ValueError(f"'{stain_choice}' is not a valid stain choice.")
 90
 91    raw_paths = natsorted(
 92        glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True)
 93    )
 94    label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths]
 95    label_paths = _preprocess_labels(label_paths)
 96
 97    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 98
 99    return raw_paths, label_paths
100
101
102def get_cvz_fluo_dataset(
103    path: Union[os.PathLike, str],
104    patch_shape: Tuple[int, int],
105    stain_choice: Literal["cell", "dapi"],
106    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
107    download: bool = False,
108    **kwargs
109) -> Dataset:
110    """Get the CVZ-Fluo dataset for cell and nucleus segmentation.
111
112    Args:
113        path: Filepath to a folder where the downloaded data will be saved.
114        patch_shape: The patch shape to use for training.
115        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
116        data_choice: The choice of dataset.
117        download: Whether to download the data if it is not present.
118        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
119
120    Returns:
121        The segmentation dataset.
122    """
123    raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download)
124
125    if "raw_transform" not in kwargs:
126        kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
127
128    if "transform" not in kwargs:
129        kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2)
130
131    return torch_em.default_segmentation_dataset(
132        raw_paths=raw_paths,
133        raw_key=None,
134        label_paths=label_paths,
135        label_key=None,
136        is_seg_dataset=False,
137        patch_shape=patch_shape,
138        **kwargs
139    )
140
141
142def get_cvz_fluo_loader(
143    path: Union[os.PathLike, str],
144    batch_size: int,
145    patch_shape: Tuple[int, int],
146    stain_choice: Literal["cell", "dapi"],
147    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
148    download: bool = False,
149    **kwargs
150) -> DataLoader:
151    """Get the CVZ-Fluo dataloader for cell and nucleus segmentation.
152
153    Args:
154        path: Filepath to a folder where the downloaded data will be saved.
155        batch_size: The batch size for training
156        patch_shape: The patch shape to use for training.
157        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
158        data_choice: The choice of dataset.
159        download: Whether to download the data if it is not present.
160        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
161
162    Returns:
163        The DataLoader.
164    """
165    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
166    dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs)
167    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://www.synapse.org/Synapse:syn27624812/'

def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False): View Source

30def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
31    """Download the CVZ-Fluo dataset.
32
33    Args:
34        path: Filepath to a folder where the downloaded data is saved.
35        download: Whether to download the data if it is not present.
36    """
37    data_dir = os.path.join(path, r"Annotation Panel Table.xlsx")
38    if not os.path.exists(data_dir):
39        os.makedirs(path, exist_ok=True)
40        # Download the dataset from 'synapse'.
41        util.download_source_synapse(path=path, entity="syn27624812", download=download)
42
43    return

Download the CVZ-Fluo dataset.

Arguments:

path: Filepath to a folder where the downloaded data is saved.
download: Whether to download the data if it is not present.

def get_cvz_fluo_paths( path: Union[os.PathLike, str], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False) -> Tuple[List[str], List[str]]: View Source

 63def get_cvz_fluo_paths(
 64    path: Union[os.PathLike, str],
 65    stain_choice: Literal["cell", "dapi"],
 66    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
 67    download: bool = False,
 68) -> Tuple[List[str], List[str]]:
 69    """Get paths to the CVZ-Fluo data.
 70
 71    Args:
 72        path: Filepath to a folder where the downloaded data will be saved.
 73        download: Whether to download the data if it is not present.
 74
 75    Returns:
 76        List of filepaths for the image data.
 77        List of filepaths for the label data.
 78    """
 79    get_cvz_fluo_data(path, download)
 80
 81    if data_choice is None:
 82        data_choice = "**"
 83    else:
 84        if data_choice == "Zeiss" and stain_choice == "dapi":
 85            raise ValueError("'Zeiss' data does not have DAPI stained images.")
 86
 87        data_choice = f"{data_choice}/**"
 88
 89    if stain_choice not in ["cell", "dapi"]:
 90        raise ValueError(f"'{stain_choice}' is not a valid stain choice.")
 91
 92    raw_paths = natsorted(
 93        glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True)
 94    )
 95    label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths]
 96    label_paths = _preprocess_labels(label_paths)
 97
 98    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 99
100    return raw_paths, label_paths

Get paths to the CVZ-Fluo data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_cvz_fluo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

103def get_cvz_fluo_dataset(
104    path: Union[os.PathLike, str],
105    patch_shape: Tuple[int, int],
106    stain_choice: Literal["cell", "dapi"],
107    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
108    download: bool = False,
109    **kwargs
110) -> Dataset:
111    """Get the CVZ-Fluo dataset for cell and nucleus segmentation.
112
113    Args:
114        path: Filepath to a folder where the downloaded data will be saved.
115        patch_shape: The patch shape to use for training.
116        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
117        data_choice: The choice of dataset.
118        download: Whether to download the data if it is not present.
119        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
120
121    Returns:
122        The segmentation dataset.
123    """
124    raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download)
125
126    if "raw_transform" not in kwargs:
127        kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
128
129    if "transform" not in kwargs:
130        kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2)
131
132    return torch_em.default_segmentation_dataset(
133        raw_paths=raw_paths,
134        raw_key=None,
135        label_paths=label_paths,
136        label_key=None,
137        is_seg_dataset=False,
138        patch_shape=patch_shape,
139        **kwargs
140    )

Get the CVZ-Fluo dataset for cell and nucleus segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
data_choice: The choice of dataset.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_cvz_fluo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

143def get_cvz_fluo_loader(
144    path: Union[os.PathLike, str],
145    batch_size: int,
146    patch_shape: Tuple[int, int],
147    stain_choice: Literal["cell", "dapi"],
148    data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None,
149    download: bool = False,
150    **kwargs
151) -> DataLoader:
152    """Get the CVZ-Fluo dataloader for cell and nucleus segmentation.
153
154    Args:
155        path: Filepath to a folder where the downloaded data will be saved.
156        batch_size: The batch size for training
157        patch_shape: The patch shape to use for training.
158        stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
159        data_choice: The choice of dataset.
160        download: Whether to download the data if it is not present.
161        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
162
163    Returns:
164        The DataLoader.
165    """
166    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
167    dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs)
168    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the CVZ-Fluo dataloader for cell and nucleus segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training
patch_shape: The patch shape to use for training.
stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
data_choice: The choice of dataset.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.