torch_em.data.datasets.light_microscopy.covid_if
This dataset contains annotation for cell and nucleus segmentation in immunofluorescence microscopy.
This dataset is from the publication https://doi.org/10.1002/bies.202000257. Please cite it if you use this dataset in your research.
1"""This dataset contains annotation for cell and nucleus segmentation 2in immunofluorescence microscopy. 3 4This dataset is from the publication https://doi.org/10.1002/bies.202000257. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from glob import glob 10from typing import List, Optional, Tuple, Union 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19COVID_IF_URL = "https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1" 20CHECKSUM = "d9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1" 21 22 23def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str: 24 """Download the Covid-IF training data. 25 26 Args: 27 path: Filepath to a folder where the downloaded data will be saved. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 The filepath to the training data. 32 """ 33 url = COVID_IF_URL 34 checksum = CHECKSUM 35 36 if os.path.exists(path): 37 return path 38 39 os.makedirs(path, exist_ok=True) 40 zip_path = os.path.join(path, "covid-if.zip") 41 util.download_source(zip_path, url, download, checksum) 42 util.unzip(zip_path, path, True) 43 44 return path 45 46 47def get_covid_if_paths( 48 path: Union[os.PathLike, str], 49 sample_range: Optional[Tuple[int, int]] = None, 50 download: bool = False 51) -> List[str]: 52 """Get paths to the Covid-IF data. 53 54 Args: 55 path: Filepath to a folder where the downloaded data will be saved. 56 sample_range: Id range of samples to load from the training dataset. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths to the stored data. 61 """ 62 get_covid_if_data(path, download) 63 64 file_paths = sorted(glob(os.path.join(path, "*.h5"))) 65 if sample_range is not None: 66 start, stop = sample_range 67 if start is None: 68 start = 0 69 if stop is None: 70 stop = len(file_paths) 71 file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)] 72 assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}" 73 74 return file_paths 75 76 77def get_covid_if_dataset( 78 path: Union[os.PathLike, str], 79 patch_shape: Tuple[int, int], 80 sample_range: Optional[Tuple[int, int]] = None, 81 target: str = "cells", 82 download: bool = False, 83 offsets: Optional[List[List[int]]] = None, 84 boundaries: bool = False, 85 binary: bool = False, 86 **kwargs 87) -> Dataset: 88 """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy. 89 90 Args: 91 path: Filepath to a folder where the downloaded data will be saved. 92 patch_shape: The patch shape to use for training. 93 sample_range: Id range of samples to load from the training dataset. 94 target: The segmentation task. Either 'cells' or 'nuclei'. 95 download: Whether to download the data if it is not present. 96 offsets: Offset values for affinity computation used as target. 97 boundaries: Whether to compute boundaries as the target. 98 binary: Whether to use a binary segmentation target. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 100 101 Returns: 102 The segmentation dataset. 103 """ 104 available_targets = ("cells", "nuclei") 105 # TODO also support infected_cells 106 # available_targets = ("cells", "nuclei", "infected_cells") 107 108 if target == "cells": 109 raw_key = "raw/serum_IgG/s0" 110 label_key = "labels/cells/s0" 111 elif target == "nuclei": 112 raw_key = "raw/nuclei/s0" 113 label_key = "labels/nuclei/s0" 114 else: 115 raise ValueError(f"{target} not found in {available_targets}") 116 117 file_paths = get_covid_if_paths(path, sample_range, download) 118 119 kwargs, _ = util.add_instance_label_transform( 120 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 121 ) 122 kwargs = util.update_kwargs(kwargs, "ndim", 2) 123 124 return torch_em.default_segmentation_dataset( 125 raw_paths=file_paths, 126 raw_key=raw_key, 127 label_paths=file_paths, 128 label_key=label_key, 129 patch_shape=patch_shape, 130 **kwargs 131 ) 132 133 134def get_covid_if_loader( 135 path: Union[os.PathLike, str], 136 patch_shape: Tuple[int, int], 137 batch_size: int, 138 sample_range: Optional[Tuple[int, int]] = None, 139 target: str = "cells", 140 download: bool = False, 141 offsets: Optional[List[List[int]]] = None, 142 boundaries: bool = False, 143 binary: bool = False, 144 **kwargs 145) -> DataLoader: 146 """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy. 147 148 Args: 149 path: Filepath to a folder where the downloaded data will be saved. 150 patch_shape: The patch shape to use for training. 151 batch_size: The batch size for training. 152 sample_range: Id range of samples to load from the training dataset. 153 target: The segmentation task. Either 'cells' or 'nuclei'. 154 download: Whether to download the data if it is not present. 155 offsets: Offset values for affinity computation used as target. 156 boundaries: Whether to compute boundaries as the target. 157 binary: Whether to use a binary segmentation target. 158 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 159 160 Returns: 161 The DataLoader. 162 """ 163 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 164 dataset = get_covid_if_dataset( 165 path, patch_shape, sample_range=sample_range, target=target, download=download, 166 offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs, 167 ) 168 return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
COVID_IF_URL =
'https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1'
CHECKSUM =
'd9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1'
def
get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str: 25 """Download the Covid-IF training data. 26 27 Args: 28 path: Filepath to a folder where the downloaded data will be saved. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 The filepath to the training data. 33 """ 34 url = COVID_IF_URL 35 checksum = CHECKSUM 36 37 if os.path.exists(path): 38 return path 39 40 os.makedirs(path, exist_ok=True) 41 zip_path = os.path.join(path, "covid-if.zip") 42 util.download_source(zip_path, url, download, checksum) 43 util.unzip(zip_path, path, True) 44 45 return path
Download the Covid-IF training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
def
get_covid_if_paths( path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False) -> List[str]:
48def get_covid_if_paths( 49 path: Union[os.PathLike, str], 50 sample_range: Optional[Tuple[int, int]] = None, 51 download: bool = False 52) -> List[str]: 53 """Get paths to the Covid-IF data. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 sample_range: Id range of samples to load from the training dataset. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths to the stored data. 62 """ 63 get_covid_if_data(path, download) 64 65 file_paths = sorted(glob(os.path.join(path, "*.h5"))) 66 if sample_range is not None: 67 start, stop = sample_range 68 if start is None: 69 start = 0 70 if stop is None: 71 stop = len(file_paths) 72 file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)] 73 assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}" 74 75 return file_paths
Get paths to the Covid-IF data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- sample_range: Id range of samples to load from the training dataset.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths to the stored data.
def
get_covid_if_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
78def get_covid_if_dataset( 79 path: Union[os.PathLike, str], 80 patch_shape: Tuple[int, int], 81 sample_range: Optional[Tuple[int, int]] = None, 82 target: str = "cells", 83 download: bool = False, 84 offsets: Optional[List[List[int]]] = None, 85 boundaries: bool = False, 86 binary: bool = False, 87 **kwargs 88) -> Dataset: 89 """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy. 90 91 Args: 92 path: Filepath to a folder where the downloaded data will be saved. 93 patch_shape: The patch shape to use for training. 94 sample_range: Id range of samples to load from the training dataset. 95 target: The segmentation task. Either 'cells' or 'nuclei'. 96 download: Whether to download the data if it is not present. 97 offsets: Offset values for affinity computation used as target. 98 boundaries: Whether to compute boundaries as the target. 99 binary: Whether to use a binary segmentation target. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 available_targets = ("cells", "nuclei") 106 # TODO also support infected_cells 107 # available_targets = ("cells", "nuclei", "infected_cells") 108 109 if target == "cells": 110 raw_key = "raw/serum_IgG/s0" 111 label_key = "labels/cells/s0" 112 elif target == "nuclei": 113 raw_key = "raw/nuclei/s0" 114 label_key = "labels/nuclei/s0" 115 else: 116 raise ValueError(f"{target} not found in {available_targets}") 117 118 file_paths = get_covid_if_paths(path, sample_range, download) 119 120 kwargs, _ = util.add_instance_label_transform( 121 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 122 ) 123 kwargs = util.update_kwargs(kwargs, "ndim", 2) 124 125 return torch_em.default_segmentation_dataset( 126 raw_paths=file_paths, 127 raw_key=raw_key, 128 label_paths=file_paths, 129 label_key=label_key, 130 patch_shape=patch_shape, 131 **kwargs 132 )
Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- sample_range: Id range of samples to load from the training dataset.
- target: The segmentation task. Either 'cells' or 'nuclei'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_covid_if_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
135def get_covid_if_loader( 136 path: Union[os.PathLike, str], 137 patch_shape: Tuple[int, int], 138 batch_size: int, 139 sample_range: Optional[Tuple[int, int]] = None, 140 target: str = "cells", 141 download: bool = False, 142 offsets: Optional[List[List[int]]] = None, 143 boundaries: bool = False, 144 binary: bool = False, 145 **kwargs 146) -> DataLoader: 147 """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy. 148 149 Args: 150 path: Filepath to a folder where the downloaded data will be saved. 151 patch_shape: The patch shape to use for training. 152 batch_size: The batch size for training. 153 sample_range: Id range of samples to load from the training dataset. 154 target: The segmentation task. Either 'cells' or 'nuclei'. 155 download: Whether to download the data if it is not present. 156 offsets: Offset values for affinity computation used as target. 157 boundaries: Whether to compute boundaries as the target. 158 binary: Whether to use a binary segmentation target. 159 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 160 161 Returns: 162 The DataLoader. 163 """ 164 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 165 dataset = get_covid_if_dataset( 166 path, patch_shape, sample_range=sample_range, target=target, download=download, 167 offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs, 168 ) 169 return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample_range: Id range of samples to load from the training dataset.
- target: The segmentation task. Either 'cells' or 'nuclei'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.