torch_em.data.datasets.light_microscopy.covid_if
This dataset contains annotation for cell and nucleus segmentation in immunofluorescence microscopy.
This dataset is from the publication https://doi.org/10.1002/bies.202000257. Please cite it if you use this dataset in your research.
1"""This dataset contains annotation for cell and nucleus segmentation 2in immunofluorescence microscopy. 3 4This dataset is from the publication https://doi.org/10.1002/bies.202000257. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from glob import glob 10from typing import List, Optional, Tuple, Union 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19COVID_IF_URL = "https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1" 20CHECKSUM = "d9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1" 21 22 23def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str: 24 """Download the Covid-IF training data. 25 26 Args: 27 path: Filepath to a folder where the downloaded data will be saved. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 The filepath to the training data. 32 """ 33 url = COVID_IF_URL 34 checksum = CHECKSUM 35 36 if os.path.exists(path): 37 return path 38 39 os.makedirs(path, exist_ok=True) 40 zip_path = os.path.join(path, "covid-if.zip") 41 util.download_source(zip_path, url, download, checksum) 42 util.unzip(zip_path, path, True) 43 44 return path 45 46 47def get_covid_if_paths( 48 path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False, 49) -> List[str]: 50 """Get paths to the Covid-IF data. 51 52 Args: 53 path: Filepath to a folder where the downloaded data will be saved. 54 sample_range: Id range of samples to load from the training dataset. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 List of filepaths to the stored data. 59 """ 60 get_covid_if_data(path, download) 61 62 file_paths = sorted(glob(os.path.join(path, "*.h5"))) 63 if sample_range is not None: 64 start, stop = sample_range 65 if start is None: 66 start = 0 67 if stop is None: 68 stop = len(file_paths) 69 file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)] 70 assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}" 71 72 return file_paths 73 74 75def get_covid_if_dataset( 76 path: Union[os.PathLike, str], 77 patch_shape: Tuple[int, int], 78 sample_range: Optional[Tuple[int, int]] = None, 79 target: str = "cells", 80 download: bool = False, 81 offsets: Optional[List[List[int]]] = None, 82 boundaries: bool = False, 83 binary: bool = False, 84 **kwargs 85) -> Dataset: 86 """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy. 87 88 Args: 89 path: Filepath to a folder where the downloaded data will be saved. 90 patch_shape: The patch shape to use for training. 91 sample_range: Id range of samples to load from the training dataset. 92 target: The segmentation task. Either 'cells' or 'nuclei'. 93 download: Whether to download the data if it is not present. 94 offsets: Offset values for affinity computation used as target. 95 boundaries: Whether to compute boundaries as the target. 96 binary: Whether to use a binary segmentation target. 97 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 98 99 Returns: 100 The segmentation dataset. 101 """ 102 available_targets = ("cells", "nuclei") 103 # TODO also support infected_cells 104 # available_targets = ("cells", "nuclei", "infected_cells") 105 106 if target == "cells": 107 raw_key = "raw/serum_IgG/s0" 108 label_key = "labels/cells/s0" 109 elif target == "nuclei": 110 raw_key = "raw/nuclei/s0" 111 label_key = "labels/nuclei/s0" 112 else: 113 raise ValueError(f"{target} not found in {available_targets}") 114 115 file_paths = get_covid_if_paths(path, sample_range, download) 116 117 kwargs, _ = util.add_instance_label_transform( 118 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 119 ) 120 kwargs = util.update_kwargs(kwargs, "ndim", 2) 121 122 return torch_em.default_segmentation_dataset( 123 raw_paths=file_paths, 124 raw_key=raw_key, 125 label_paths=file_paths, 126 label_key=label_key, 127 patch_shape=patch_shape, 128 **kwargs 129 ) 130 131 132def get_covid_if_loader( 133 path: Union[os.PathLike, str], 134 patch_shape: Tuple[int, int], 135 batch_size: int, 136 sample_range: Optional[Tuple[int, int]] = None, 137 target: str = "cells", 138 download: bool = False, 139 offsets: Optional[List[List[int]]] = None, 140 boundaries: bool = False, 141 binary: bool = False, 142 **kwargs 143) -> DataLoader: 144 """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy. 145 146 Args: 147 path: Filepath to a folder where the downloaded data will be saved. 148 patch_shape: The patch shape to use for training. 149 batch_size: The batch size for training. 150 sample_range: Id range of samples to load from the training dataset. 151 target: The segmentation task. Either 'cells' or 'nuclei'. 152 download: Whether to download the data if it is not present. 153 offsets: Offset values for affinity computation used as target. 154 boundaries: Whether to compute boundaries as the target. 155 binary: Whether to use a binary segmentation target. 156 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 157 158 Returns: 159 The DataLoader. 160 """ 161 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 162 dataset = get_covid_if_dataset( 163 path, patch_shape, sample_range=sample_range, target=target, download=download, 164 offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs, 165 ) 166 return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
COVID_IF_URL =
'https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1'
CHECKSUM =
'd9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1'
def
get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str: 25 """Download the Covid-IF training data. 26 27 Args: 28 path: Filepath to a folder where the downloaded data will be saved. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 The filepath to the training data. 33 """ 34 url = COVID_IF_URL 35 checksum = CHECKSUM 36 37 if os.path.exists(path): 38 return path 39 40 os.makedirs(path, exist_ok=True) 41 zip_path = os.path.join(path, "covid-if.zip") 42 util.download_source(zip_path, url, download, checksum) 43 util.unzip(zip_path, path, True) 44 45 return path
Download the Covid-IF training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
def
get_covid_if_paths( path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False) -> List[str]:
48def get_covid_if_paths( 49 path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False, 50) -> List[str]: 51 """Get paths to the Covid-IF data. 52 53 Args: 54 path: Filepath to a folder where the downloaded data will be saved. 55 sample_range: Id range of samples to load from the training dataset. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 List of filepaths to the stored data. 60 """ 61 get_covid_if_data(path, download) 62 63 file_paths = sorted(glob(os.path.join(path, "*.h5"))) 64 if sample_range is not None: 65 start, stop = sample_range 66 if start is None: 67 start = 0 68 if stop is None: 69 stop = len(file_paths) 70 file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)] 71 assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}" 72 73 return file_paths
Get paths to the Covid-IF data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- sample_range: Id range of samples to load from the training dataset.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths to the stored data.
def
get_covid_if_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
76def get_covid_if_dataset( 77 path: Union[os.PathLike, str], 78 patch_shape: Tuple[int, int], 79 sample_range: Optional[Tuple[int, int]] = None, 80 target: str = "cells", 81 download: bool = False, 82 offsets: Optional[List[List[int]]] = None, 83 boundaries: bool = False, 84 binary: bool = False, 85 **kwargs 86) -> Dataset: 87 """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 patch_shape: The patch shape to use for training. 92 sample_range: Id range of samples to load from the training dataset. 93 target: The segmentation task. Either 'cells' or 'nuclei'. 94 download: Whether to download the data if it is not present. 95 offsets: Offset values for affinity computation used as target. 96 boundaries: Whether to compute boundaries as the target. 97 binary: Whether to use a binary segmentation target. 98 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 99 100 Returns: 101 The segmentation dataset. 102 """ 103 available_targets = ("cells", "nuclei") 104 # TODO also support infected_cells 105 # available_targets = ("cells", "nuclei", "infected_cells") 106 107 if target == "cells": 108 raw_key = "raw/serum_IgG/s0" 109 label_key = "labels/cells/s0" 110 elif target == "nuclei": 111 raw_key = "raw/nuclei/s0" 112 label_key = "labels/nuclei/s0" 113 else: 114 raise ValueError(f"{target} not found in {available_targets}") 115 116 file_paths = get_covid_if_paths(path, sample_range, download) 117 118 kwargs, _ = util.add_instance_label_transform( 119 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 120 ) 121 kwargs = util.update_kwargs(kwargs, "ndim", 2) 122 123 return torch_em.default_segmentation_dataset( 124 raw_paths=file_paths, 125 raw_key=raw_key, 126 label_paths=file_paths, 127 label_key=label_key, 128 patch_shape=patch_shape, 129 **kwargs 130 )
Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- sample_range: Id range of samples to load from the training dataset.
- target: The segmentation task. Either 'cells' or 'nuclei'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_covid_if_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
133def get_covid_if_loader( 134 path: Union[os.PathLike, str], 135 patch_shape: Tuple[int, int], 136 batch_size: int, 137 sample_range: Optional[Tuple[int, int]] = None, 138 target: str = "cells", 139 download: bool = False, 140 offsets: Optional[List[List[int]]] = None, 141 boundaries: bool = False, 142 binary: bool = False, 143 **kwargs 144) -> DataLoader: 145 """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy. 146 147 Args: 148 path: Filepath to a folder where the downloaded data will be saved. 149 patch_shape: The patch shape to use for training. 150 batch_size: The batch size for training. 151 sample_range: Id range of samples to load from the training dataset. 152 target: The segmentation task. Either 'cells' or 'nuclei'. 153 download: Whether to download the data if it is not present. 154 offsets: Offset values for affinity computation used as target. 155 boundaries: Whether to compute boundaries as the target. 156 binary: Whether to use a binary segmentation target. 157 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 158 159 Returns: 160 The DataLoader. 161 """ 162 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 163 dataset = get_covid_if_dataset( 164 path, patch_shape, sample_range=sample_range, target=target, download=download, 165 offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs, 166 ) 167 return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample_range: Id range of samples to load from the training dataset.
- target: The segmentation task. Either 'cells' or 'nuclei'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.