torch_em.data.datasets.light_microscopy.covid_if
This dataset contains annotation for cell and nucleus segmentation in immunofluorescence microscopy.
This dataset is from the publication https://doi.org/10.1002/bies.202000257. Please cite it if you use this dataset in your research.
1"""This dataset contains annotation for cell and nucleus segmentation 2in immunofluorescence microscopy. 3 4This dataset is from the publication https://doi.org/10.1002/bies.202000257. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from glob import glob 10from typing import List, Optional, Tuple, Union 11 12import torch_em 13from torch.utils.data import Dataset, DataLoader 14from .. import util 15 16COVID_IF_URL = "https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1" 17CHECKSUM = "d9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1" 18 19 20def get_covid_if_data(path: Union[os.PathLike, str], download: bool) -> str: 21 """Download the CovidIF training data. 22 23 Args: 24 path: Filepath to a folder where the downloaded data will be saved. 25 download: Whether to download the data if it is not present. 26 27 Returns: 28 The filepath to the training data. 29 """ 30 url = COVID_IF_URL 31 checksum = CHECKSUM 32 33 if os.path.exists(path): 34 return path 35 36 os.makedirs(path, exist_ok=True) 37 zip_path = os.path.join(path, "covid-if.zip") 38 util.download_source(zip_path, url, download, checksum) 39 util.unzip(zip_path, path, True) 40 41 return path 42 43 44def get_covid_if_dataset( 45 path: Union[os.PathLike, str], 46 patch_shape: Tuple[int, int], 47 sample_range: Optional[Tuple[int, int]] = None, 48 target: str = "cells", 49 download: bool = False, 50 offsets: Optional[List[List[int]]] = None, 51 boundaries: bool = False, 52 binary: bool = False, 53 **kwargs 54) -> Dataset: 55 """Get the CovidIF dataset for segmenting nuclei or cells in immunofluorescence microscopy. 56 57 Args: 58 path: Filepath to a folder where the downloaded data will be saved. 59 patch_shape: The patch shape to use for training. 60 sample_range: Id range of samples to load from the training dataset. 61 target: The segmentation task. Either 'cells' or 'nuclei'. 62 download: Whether to download the data if it is not present. 63 offsets: Offset values for affinity computation used as target. 64 boundaries: Whether to compute boundaries as the target. 65 binary: Whether to use a binary segmentation target. 66 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 67 68 Returns: 69 The segmentation dataset. 70 """ 71 available_targets = ("cells", "nuclei") 72 # TODO also support infected_cells 73 # available_targets = ("cells", "nuclei", "infected_cells") 74 assert target in available_targets, f"{target} not found in {available_targets}" 75 76 if target == "cells": 77 raw_key = "raw/serum_IgG/s0" 78 label_key = "labels/cells/s0" 79 elif target == "nuclei": 80 raw_key = "raw/nuclei/s0" 81 label_key = "labels/nuclei/s0" 82 83 get_covid_if_data(path, download) 84 85 file_paths = sorted(glob(os.path.join(path, "*.h5"))) 86 if sample_range is not None: 87 start, stop = sample_range 88 if start is None: 89 start = 0 90 if stop is None: 91 stop = len(file_paths) 92 file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)] 93 assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}" 94 95 kwargs, _ = util.add_instance_label_transform( 96 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 97 ) 98 kwargs = util.update_kwargs(kwargs, "ndim", 2) 99 100 return torch_em.default_segmentation_dataset( 101 file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs 102 ) 103 104 105def get_covid_if_loader( 106 path: Union[os.PathLike, str], 107 patch_shape: Tuple[int, int], 108 batch_size: int, 109 sample_range: Optional[Tuple[int, int]] = None, 110 target: str = "cells", 111 download: bool = False, 112 offsets: Optional[List[List[int]]] = None, 113 boundaries: bool = False, 114 binary: bool = False, 115 **kwargs 116) -> DataLoader: 117 """Get the CovidIF dataloder for segmenting nuclei or cells in immunofluorescence microscopy. 118 119 Args: 120 path: Filepath to a folder where the downloaded data will be saved. 121 patch_shape: The patch shape to use for training. 122 batch_size: The batch size for training. 123 sample_range: Id range of samples to load from the training dataset. 124 target: The segmentation task. Either 'cells' or 'nuclei'. 125 download: Whether to download the data if it is not present. 126 offsets: Offset values for affinity computation used as target. 127 boundaries: Whether to compute boundaries as the target. 128 binary: Whether to use a binary segmentation target. 129 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 130 131 Returns: 132 The DataLoader. 133 """ 134 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 135 dataset = get_covid_if_dataset( 136 path, patch_shape, sample_range=sample_range, target=target, download=download, 137 offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs, 138 ) 139 loader = torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs) 140 return loader
COVID_IF_URL =
'https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1'
CHECKSUM =
'd9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1'
def
get_covid_if_data(path: Union[os.PathLike, str], download: bool) -> str:
21def get_covid_if_data(path: Union[os.PathLike, str], download: bool) -> str: 22 """Download the CovidIF training data. 23 24 Args: 25 path: Filepath to a folder where the downloaded data will be saved. 26 download: Whether to download the data if it is not present. 27 28 Returns: 29 The filepath to the training data. 30 """ 31 url = COVID_IF_URL 32 checksum = CHECKSUM 33 34 if os.path.exists(path): 35 return path 36 37 os.makedirs(path, exist_ok=True) 38 zip_path = os.path.join(path, "covid-if.zip") 39 util.download_source(zip_path, url, download, checksum) 40 util.unzip(zip_path, path, True) 41 42 return path
Download the CovidIF training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
def
get_covid_if_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
45def get_covid_if_dataset( 46 path: Union[os.PathLike, str], 47 patch_shape: Tuple[int, int], 48 sample_range: Optional[Tuple[int, int]] = None, 49 target: str = "cells", 50 download: bool = False, 51 offsets: Optional[List[List[int]]] = None, 52 boundaries: bool = False, 53 binary: bool = False, 54 **kwargs 55) -> Dataset: 56 """Get the CovidIF dataset for segmenting nuclei or cells in immunofluorescence microscopy. 57 58 Args: 59 path: Filepath to a folder where the downloaded data will be saved. 60 patch_shape: The patch shape to use for training. 61 sample_range: Id range of samples to load from the training dataset. 62 target: The segmentation task. Either 'cells' or 'nuclei'. 63 download: Whether to download the data if it is not present. 64 offsets: Offset values for affinity computation used as target. 65 boundaries: Whether to compute boundaries as the target. 66 binary: Whether to use a binary segmentation target. 67 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 68 69 Returns: 70 The segmentation dataset. 71 """ 72 available_targets = ("cells", "nuclei") 73 # TODO also support infected_cells 74 # available_targets = ("cells", "nuclei", "infected_cells") 75 assert target in available_targets, f"{target} not found in {available_targets}" 76 77 if target == "cells": 78 raw_key = "raw/serum_IgG/s0" 79 label_key = "labels/cells/s0" 80 elif target == "nuclei": 81 raw_key = "raw/nuclei/s0" 82 label_key = "labels/nuclei/s0" 83 84 get_covid_if_data(path, download) 85 86 file_paths = sorted(glob(os.path.join(path, "*.h5"))) 87 if sample_range is not None: 88 start, stop = sample_range 89 if start is None: 90 start = 0 91 if stop is None: 92 stop = len(file_paths) 93 file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)] 94 assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}" 95 96 kwargs, _ = util.add_instance_label_transform( 97 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 98 ) 99 kwargs = util.update_kwargs(kwargs, "ndim", 2) 100 101 return torch_em.default_segmentation_dataset( 102 file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs 103 )
Get the CovidIF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- sample_range: Id range of samples to load from the training dataset.
- target: The segmentation task. Either 'cells' or 'nuclei'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_covid_if_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
106def get_covid_if_loader( 107 path: Union[os.PathLike, str], 108 patch_shape: Tuple[int, int], 109 batch_size: int, 110 sample_range: Optional[Tuple[int, int]] = None, 111 target: str = "cells", 112 download: bool = False, 113 offsets: Optional[List[List[int]]] = None, 114 boundaries: bool = False, 115 binary: bool = False, 116 **kwargs 117) -> DataLoader: 118 """Get the CovidIF dataloder for segmenting nuclei or cells in immunofluorescence microscopy. 119 120 Args: 121 path: Filepath to a folder where the downloaded data will be saved. 122 patch_shape: The patch shape to use for training. 123 batch_size: The batch size for training. 124 sample_range: Id range of samples to load from the training dataset. 125 target: The segmentation task. Either 'cells' or 'nuclei'. 126 download: Whether to download the data if it is not present. 127 offsets: Offset values for affinity computation used as target. 128 boundaries: Whether to compute boundaries as the target. 129 binary: Whether to use a binary segmentation target. 130 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 131 132 Returns: 133 The DataLoader. 134 """ 135 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 136 dataset = get_covid_if_dataset( 137 path, patch_shape, sample_range=sample_range, target=target, download=download, 138 offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs, 139 ) 140 loader = torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs) 141 return loader
Get the CovidIF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- sample_range: Id range of samples to load from the training dataset.
- target: The segmentation task. Either 'cells' or 'nuclei'.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.