torch_em.data.datasets.light_microscopy.cvz_fluo
The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in fluorescence microscopy images.
The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z. Please cite it if you use this dataset for your research.
1"""The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in 2fluorescence microscopy images. 3 4The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from pathlib import Path 12from natsort import natsorted 13from typing import Union, Literal, Tuple, Optional, List 14 15import imageio.v3 as imageio 16from skimage.measure import label as connected_components 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23from .neurips_cell_seg import to_rgb 24 25 26URL = "https://www.synapse.org/Synapse:syn27624812/" 27 28 29def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False): 30 """Download the CVZ-Fluo dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data is saved. 34 download: Whether to download the data if it is not present. 35 """ 36 data_dir = os.path.join(path, r"Annotation Panel Table.xlsx") 37 if not os.path.exists(data_dir): 38 os.makedirs(path, exist_ok=True) 39 # Download the dataset from 'synapse'. 40 util.download_source_synapse(path=path, entity="syn27624812", download=download) 41 42 return 43 44 45def _preprocess_labels(label_paths): 46 neu_label_paths = [] 47 for lpath in tqdm(label_paths, desc="Preprocessing labels"): 48 neu_lpath = lpath.replace(".png", ".tif") 49 neu_label_paths.append(neu_lpath) 50 if os.path.exists(neu_lpath): 51 continue 52 53 if not os.path.exists(lpath): # HACK: some paths have weird spacing nomenclature. 54 lpath = Path(lpath).parent / rf" {os.path.basename(lpath)}" 55 56 label = imageio.imread(lpath) 57 imageio.imwrite(neu_lpath, connected_components(label).astype(label.dtype), compression="zlib") 58 59 return neu_label_paths 60 61 62def get_cvz_fluo_paths( 63 path: Union[os.PathLike, str], 64 stain_choice: Literal["cell", "dapi"], 65 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 66 download: bool = False, 67) -> Tuple[List[str], List[str]]: 68 """Get paths to the CVZ-Fluo data. 69 70 Args: 71 path: Filepath to a folder where the downloaded data will be saved. 72 download: Whether to download the data if it is not present. 73 74 Returns: 75 List of filepaths for the image data. 76 List of filepaths for the label data. 77 """ 78 get_cvz_fluo_data(path, download) 79 80 if data_choice is None: 81 data_choice = "**" 82 else: 83 if data_choice == "Zeiss" and stain_choice == "dapi": 84 raise ValueError("'Zeiss' data does not have DAPI stained images.") 85 86 data_choice = f"{data_choice}/**" 87 88 if stain_choice not in ["cell", "dapi"]: 89 raise ValueError(f"'{stain_choice}' is not a valid stain choice.") 90 91 raw_paths = natsorted( 92 glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True) 93 ) 94 label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths] 95 label_paths = _preprocess_labels(label_paths) 96 97 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 98 99 return raw_paths, label_paths 100 101 102def get_cvz_fluo_dataset( 103 path: Union[os.PathLike, str], 104 patch_shape: Tuple[int, int], 105 stain_choice: Literal["cell", "dapi"], 106 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 107 download: bool = False, 108 **kwargs 109) -> Dataset: 110 """Get the CVZ-Fluo dataset for cell and nucleus segmentation. 111 112 Args: 113 path: Filepath to a folder where the downloaded data will be saved. 114 patch_shape: The patch shape to use for training. 115 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 116 data_choice: The choice of dataset. 117 download: Whether to download the data if it is not present. 118 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 119 120 Returns: 121 The segmentation dataset. 122 """ 123 raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download) 124 125 if "raw_transform" not in kwargs: 126 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 127 128 if "transform" not in kwargs: 129 kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2) 130 131 return torch_em.default_segmentation_dataset( 132 raw_paths=raw_paths, 133 raw_key=None, 134 label_paths=label_paths, 135 label_key=None, 136 is_seg_dataset=False, 137 patch_shape=patch_shape, 138 **kwargs 139 ) 140 141 142def get_cvz_fluo_loader( 143 path: Union[os.PathLike, str], 144 batch_size: int, 145 patch_shape: Tuple[int, int], 146 stain_choice: Literal["cell", "dapi"], 147 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 148 download: bool = False, 149 **kwargs 150) -> DataLoader: 151 """Get the CVZ-Fluo dataloader for cell and nucleus segmentation. 152 153 Args: 154 path: Filepath to a folder where the downloaded data will be saved. 155 batch_size: The batch size for training 156 patch_shape: The patch shape to use for training. 157 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 158 data_choice: The choice of dataset. 159 download: Whether to download the data if it is not present. 160 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 161 162 Returns: 163 The DataLoader. 164 """ 165 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 166 dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs) 167 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://www.synapse.org/Synapse:syn27624812/'
def
get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
30def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False): 31 """Download the CVZ-Fluo dataset. 32 33 Args: 34 path: Filepath to a folder where the downloaded data is saved. 35 download: Whether to download the data if it is not present. 36 """ 37 data_dir = os.path.join(path, r"Annotation Panel Table.xlsx") 38 if not os.path.exists(data_dir): 39 os.makedirs(path, exist_ok=True) 40 # Download the dataset from 'synapse'. 41 util.download_source_synapse(path=path, entity="syn27624812", download=download) 42 43 return
Download the CVZ-Fluo dataset.
Arguments:
- path: Filepath to a folder where the downloaded data is saved.
- download: Whether to download the data if it is not present.
def
get_cvz_fluo_paths( path: Union[os.PathLike, str], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
63def get_cvz_fluo_paths( 64 path: Union[os.PathLike, str], 65 stain_choice: Literal["cell", "dapi"], 66 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 67 download: bool = False, 68) -> Tuple[List[str], List[str]]: 69 """Get paths to the CVZ-Fluo data. 70 71 Args: 72 path: Filepath to a folder where the downloaded data will be saved. 73 download: Whether to download the data if it is not present. 74 75 Returns: 76 List of filepaths for the image data. 77 List of filepaths for the label data. 78 """ 79 get_cvz_fluo_data(path, download) 80 81 if data_choice is None: 82 data_choice = "**" 83 else: 84 if data_choice == "Zeiss" and stain_choice == "dapi": 85 raise ValueError("'Zeiss' data does not have DAPI stained images.") 86 87 data_choice = f"{data_choice}/**" 88 89 if stain_choice not in ["cell", "dapi"]: 90 raise ValueError(f"'{stain_choice}' is not a valid stain choice.") 91 92 raw_paths = natsorted( 93 glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True) 94 ) 95 label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths] 96 label_paths = _preprocess_labels(label_paths) 97 98 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 99 100 return raw_paths, label_paths
Get paths to the CVZ-Fluo data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_cvz_fluo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
103def get_cvz_fluo_dataset( 104 path: Union[os.PathLike, str], 105 patch_shape: Tuple[int, int], 106 stain_choice: Literal["cell", "dapi"], 107 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 108 download: bool = False, 109 **kwargs 110) -> Dataset: 111 """Get the CVZ-Fluo dataset for cell and nucleus segmentation. 112 113 Args: 114 path: Filepath to a folder where the downloaded data will be saved. 115 patch_shape: The patch shape to use for training. 116 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 117 data_choice: The choice of dataset. 118 download: Whether to download the data if it is not present. 119 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 120 121 Returns: 122 The segmentation dataset. 123 """ 124 raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download) 125 126 if "raw_transform" not in kwargs: 127 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 128 129 if "transform" not in kwargs: 130 kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2) 131 132 return torch_em.default_segmentation_dataset( 133 raw_paths=raw_paths, 134 raw_key=None, 135 label_paths=label_paths, 136 label_key=None, 137 is_seg_dataset=False, 138 patch_shape=patch_shape, 139 **kwargs 140 )
Get the CVZ-Fluo dataset for cell and nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
- data_choice: The choice of dataset.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_cvz_fluo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
143def get_cvz_fluo_loader( 144 path: Union[os.PathLike, str], 145 batch_size: int, 146 patch_shape: Tuple[int, int], 147 stain_choice: Literal["cell", "dapi"], 148 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 149 download: bool = False, 150 **kwargs 151) -> DataLoader: 152 """Get the CVZ-Fluo dataloader for cell and nucleus segmentation. 153 154 Args: 155 path: Filepath to a folder where the downloaded data will be saved. 156 batch_size: The batch size for training 157 patch_shape: The patch shape to use for training. 158 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 159 data_choice: The choice of dataset. 160 download: Whether to download the data if it is not present. 161 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 162 163 Returns: 164 The DataLoader. 165 """ 166 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 167 dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs) 168 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the CVZ-Fluo dataloader for cell and nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training
- patch_shape: The patch shape to use for training.
- stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
- data_choice: The choice of dataset.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.