torch_em.data.datasets.light_microscopy.cvz_fluo
The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in fluorescence microscopy images.
The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z. Please cite it if you use this dataset for your research.
1"""The CVZ-Fluo dataset contains annotations for cell and nuclei segmentation in 2fluorescence microscopy images. 3 4The dataset is from the publication https://doi.org/10.1038/s41597-023-02108-z. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from pathlib import Path 12from natsort import natsorted 13from typing import Union, Literal, Tuple, Optional, List 14 15import imageio.v3 as imageio 16from skimage.measure import label as connected_components 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23from .neurips_cell_seg import to_rgb 24 25 26URL = "https://www.synapse.org/Synapse:syn27624812/" 27 28 29def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False): 30 """Download the CVZ-Fluo dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data is saved. 34 download: Whether to download the data if it is not present. 35 """ 36 data_dir = os.path.join(path, r"Annotation Panel Table.xlsx") 37 if not os.path.exists(data_dir): 38 os.makedirs(path, exist_ok=True) 39 # Download the dataset from 'synapse'. 40 util.download_source_synapse(path=path, entity="syn27624812", download=download) 41 42 return 43 44 45def _preprocess_labels(label_paths): 46 neu_label_paths, to_process = [], [] 47 48 # First, make simple checks to avoid redundant progress bar runs. 49 for lpath in label_paths: 50 neu_lpath = lpath.replace(".png", ".tif") 51 neu_label_paths.append(neu_lpath) 52 53 if not os.path.exists(neu_lpath): 54 to_process.append((lpath, neu_lpath)) 55 56 if to_process: # Next, process valid inputs. 57 for lpath, neu_lpath in tqdm(to_process, desc="Preprocessing labels"): 58 if not os.path.exists(lpath): # HACK: Some paths have weird spacing nomenclature. 59 lpath = Path(lpath).parent / rf" {os.path.basename(lpath)}" 60 61 label = imageio.imread(lpath) 62 imageio.imwrite(neu_lpath, connected_components(label).astype(label.dtype), compression="zlib") 63 64 return neu_label_paths 65 66 67def get_cvz_fluo_paths( 68 path: Union[os.PathLike, str], 69 stain_choice: Literal["cell", "dapi"], 70 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 71 download: bool = False, 72) -> Tuple[List[str], List[str]]: 73 """Get paths to the CVZ-Fluo data. 74 75 Args: 76 path: Filepath to a folder where the downloaded data will be saved. 77 download: Whether to download the data if it is not present. 78 79 Returns: 80 List of filepaths for the image data. 81 List of filepaths for the label data. 82 """ 83 get_cvz_fluo_data(path, download) 84 85 if data_choice is None: 86 data_choice = "**" 87 else: 88 if data_choice == "Zeiss" and stain_choice == "dapi": 89 raise ValueError("'Zeiss' data does not have DAPI stained images.") 90 91 data_choice = f"{data_choice}/**" 92 93 if stain_choice not in ["cell", "dapi"]: 94 raise ValueError(f"'{stain_choice}' is not a valid stain choice.") 95 96 raw_paths = natsorted( 97 glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True) 98 ) 99 label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths] 100 label_paths = _preprocess_labels(label_paths) 101 102 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 103 104 return raw_paths, label_paths 105 106 107def get_cvz_fluo_dataset( 108 path: Union[os.PathLike, str], 109 patch_shape: Tuple[int, int], 110 stain_choice: Literal["cell", "dapi"], 111 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 112 download: bool = False, 113 **kwargs 114) -> Dataset: 115 """Get the CVZ-Fluo dataset for cell and nucleus segmentation. 116 117 Args: 118 path: Filepath to a folder where the downloaded data will be saved. 119 patch_shape: The patch shape to use for training. 120 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 121 data_choice: The choice of dataset. 122 download: Whether to download the data if it is not present. 123 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 124 125 Returns: 126 The segmentation dataset. 127 """ 128 raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download) 129 130 if "raw_transform" not in kwargs: 131 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 132 133 if "transform" not in kwargs: 134 kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2) 135 136 return torch_em.default_segmentation_dataset( 137 raw_paths=raw_paths, 138 raw_key=None, 139 label_paths=label_paths, 140 label_key=None, 141 is_seg_dataset=False, 142 patch_shape=patch_shape, 143 **kwargs 144 ) 145 146 147def get_cvz_fluo_loader( 148 path: Union[os.PathLike, str], 149 batch_size: int, 150 patch_shape: Tuple[int, int], 151 stain_choice: Literal["cell", "dapi"], 152 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 153 download: bool = False, 154 **kwargs 155) -> DataLoader: 156 """Get the CVZ-Fluo dataloader for cell and nucleus segmentation. 157 158 Args: 159 path: Filepath to a folder where the downloaded data will be saved. 160 batch_size: The batch size for training 161 patch_shape: The patch shape to use for training. 162 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 163 data_choice: The choice of dataset. 164 download: Whether to download the data if it is not present. 165 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 166 167 Returns: 168 The DataLoader. 169 """ 170 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 171 dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs) 172 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://www.synapse.org/Synapse:syn27624812/'
def
get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False):
30def get_cvz_fluo_data(path: Union[os.PathLike, str], download: bool = False): 31 """Download the CVZ-Fluo dataset. 32 33 Args: 34 path: Filepath to a folder where the downloaded data is saved. 35 download: Whether to download the data if it is not present. 36 """ 37 data_dir = os.path.join(path, r"Annotation Panel Table.xlsx") 38 if not os.path.exists(data_dir): 39 os.makedirs(path, exist_ok=True) 40 # Download the dataset from 'synapse'. 41 util.download_source_synapse(path=path, entity="syn27624812", download=download) 42 43 return
Download the CVZ-Fluo dataset.
Arguments:
- path: Filepath to a folder where the downloaded data is saved.
- download: Whether to download the data if it is not present.
def
get_cvz_fluo_paths( path: Union[os.PathLike, str], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
68def get_cvz_fluo_paths( 69 path: Union[os.PathLike, str], 70 stain_choice: Literal["cell", "dapi"], 71 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 72 download: bool = False, 73) -> Tuple[List[str], List[str]]: 74 """Get paths to the CVZ-Fluo data. 75 76 Args: 77 path: Filepath to a folder where the downloaded data will be saved. 78 download: Whether to download the data if it is not present. 79 80 Returns: 81 List of filepaths for the image data. 82 List of filepaths for the label data. 83 """ 84 get_cvz_fluo_data(path, download) 85 86 if data_choice is None: 87 data_choice = "**" 88 else: 89 if data_choice == "Zeiss" and stain_choice == "dapi": 90 raise ValueError("'Zeiss' data does not have DAPI stained images.") 91 92 data_choice = f"{data_choice}/**" 93 94 if stain_choice not in ["cell", "dapi"]: 95 raise ValueError(f"'{stain_choice}' is not a valid stain choice.") 96 97 raw_paths = natsorted( 98 glob(os.path.join(path, data_choice, f"*-Crop_{stain_choice.title()}_Png.png"), recursive=True) 99 ) 100 label_paths = [p.replace("_Png.png", "_Mask_Png.png") for p in raw_paths] 101 label_paths = _preprocess_labels(label_paths) 102 103 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 104 105 return raw_paths, label_paths
Get paths to the CVZ-Fluo data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_cvz_fluo_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
108def get_cvz_fluo_dataset( 109 path: Union[os.PathLike, str], 110 patch_shape: Tuple[int, int], 111 stain_choice: Literal["cell", "dapi"], 112 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 113 download: bool = False, 114 **kwargs 115) -> Dataset: 116 """Get the CVZ-Fluo dataset for cell and nucleus segmentation. 117 118 Args: 119 path: Filepath to a folder where the downloaded data will be saved. 120 patch_shape: The patch shape to use for training. 121 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 122 data_choice: The choice of dataset. 123 download: Whether to download the data if it is not present. 124 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 125 126 Returns: 127 The segmentation dataset. 128 """ 129 raw_paths, label_paths = get_cvz_fluo_paths(path, stain_choice, data_choice, download) 130 131 if "raw_transform" not in kwargs: 132 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 133 134 if "transform" not in kwargs: 135 kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2) 136 137 return torch_em.default_segmentation_dataset( 138 raw_paths=raw_paths, 139 raw_key=None, 140 label_paths=label_paths, 141 label_key=None, 142 is_seg_dataset=False, 143 patch_shape=patch_shape, 144 **kwargs 145 )
Get the CVZ-Fluo dataset for cell and nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
- data_choice: The choice of dataset.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_cvz_fluo_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], stain_choice: Literal['cell', 'dapi'], data_choice: Optional[Literal['CODEX', 'Vectra', 'Zeiss']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
148def get_cvz_fluo_loader( 149 path: Union[os.PathLike, str], 150 batch_size: int, 151 patch_shape: Tuple[int, int], 152 stain_choice: Literal["cell", "dapi"], 153 data_choice: Optional[Literal["CODEX", "Vectra", "Zeiss"]] = None, 154 download: bool = False, 155 **kwargs 156) -> DataLoader: 157 """Get the CVZ-Fluo dataloader for cell and nucleus segmentation. 158 159 Args: 160 path: Filepath to a folder where the downloaded data will be saved. 161 batch_size: The batch size for training 162 patch_shape: The patch shape to use for training. 163 stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei). 164 data_choice: The choice of dataset. 165 download: Whether to download the data if it is not present. 166 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 167 168 Returns: 169 The DataLoader. 170 """ 171 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 172 dataset = get_cvz_fluo_dataset(path, patch_shape, stain_choice, data_choice, download, **ds_kwargs) 173 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the CVZ-Fluo dataloader for cell and nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training
- patch_shape: The patch shape to use for training.
- stain_choice: Decides for annotations based on staining. Either "cell" (for cells) or "dapi" (for nuclei).
- data_choice: The choice of dataset.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.