torch_em.data.datasets.light_microscopy.covid_if

This dataset contains annotation for cell and nucleus segmentation in immunofluorescence microscopy.

This dataset is from the publication https://doi.org/10.1002/bies.202000257. Please cite it if you use this dataset in your research.

  1"""This dataset contains annotation for cell and nucleus segmentation
  2in immunofluorescence microscopy.
  3
  4This dataset is from the publication https://doi.org/10.1002/bies.202000257.
  5Please cite it if you use this dataset in your research.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import List, Optional, Tuple, Union
 11
 12import torch_em
 13from torch.utils.data import Dataset, DataLoader
 14from .. import util
 15
 16COVID_IF_URL = "https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1"
 17CHECKSUM = "d9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1"
 18
 19
 20def get_covid_if_data(path: Union[os.PathLike, str], download: bool) -> str:
 21    """Download the CovidIF training data.
 22
 23    Args:
 24        path: Filepath to a folder where the downloaded data will be saved.
 25        download: Whether to download the data if it is not present.
 26
 27    Returns:
 28        The filepath to the training data.
 29    """
 30    url = COVID_IF_URL
 31    checksum = CHECKSUM
 32
 33    if os.path.exists(path):
 34        return path
 35
 36    os.makedirs(path, exist_ok=True)
 37    zip_path = os.path.join(path, "covid-if.zip")
 38    util.download_source(zip_path, url, download, checksum)
 39    util.unzip(zip_path, path, True)
 40
 41    return path
 42
 43
 44def get_covid_if_dataset(
 45    path: Union[os.PathLike, str],
 46    patch_shape: Tuple[int, int],
 47    sample_range: Optional[Tuple[int, int]] = None,
 48    target: str = "cells",
 49    download: bool = False,
 50    offsets: Optional[List[List[int]]] = None,
 51    boundaries: bool = False,
 52    binary: bool = False,
 53    **kwargs
 54) -> Dataset:
 55    """Get the CovidIF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
 56
 57    Args:
 58        path: Filepath to a folder where the downloaded data will be saved.
 59        patch_shape: The patch shape to use for training.
 60        sample_range: Id range of samples to load from the training dataset.
 61        target: The segmentation task. Either 'cells' or 'nuclei'.
 62        download: Whether to download the data if it is not present.
 63        offsets: Offset values for affinity computation used as target.
 64        boundaries: Whether to compute boundaries as the target.
 65        binary: Whether to use a binary segmentation target.
 66        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 67
 68    Returns:
 69       The segmentation dataset.
 70    """
 71    available_targets = ("cells", "nuclei")
 72    # TODO also support infected_cells
 73    # available_targets = ("cells", "nuclei", "infected_cells")
 74    assert target in available_targets, f"{target} not found in {available_targets}"
 75
 76    if target == "cells":
 77        raw_key = "raw/serum_IgG/s0"
 78        label_key = "labels/cells/s0"
 79    elif target == "nuclei":
 80        raw_key = "raw/nuclei/s0"
 81        label_key = "labels/nuclei/s0"
 82
 83    get_covid_if_data(path, download)
 84
 85    file_paths = sorted(glob(os.path.join(path, "*.h5")))
 86    if sample_range is not None:
 87        start, stop = sample_range
 88        if start is None:
 89            start = 0
 90        if stop is None:
 91            stop = len(file_paths)
 92        file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)]
 93        assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}"
 94
 95    kwargs, _ = util.add_instance_label_transform(
 96        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
 97    )
 98    kwargs = util.update_kwargs(kwargs, "ndim", 2)
 99
100    return torch_em.default_segmentation_dataset(
101        file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs
102    )
103
104
105def get_covid_if_loader(
106    path: Union[os.PathLike, str],
107    patch_shape: Tuple[int, int],
108    batch_size: int,
109    sample_range: Optional[Tuple[int, int]] = None,
110    target: str = "cells",
111    download: bool = False,
112    offsets: Optional[List[List[int]]] = None,
113    boundaries: bool = False,
114    binary: bool = False,
115    **kwargs
116) -> DataLoader:
117    """Get the CovidIF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
118
119    Args:
120        path: Filepath to a folder where the downloaded data will be saved.
121        patch_shape: The patch shape to use for training.
122        batch_size: The batch size for training.
123        sample_range: Id range of samples to load from the training dataset.
124        target: The segmentation task. Either 'cells' or 'nuclei'.
125        download: Whether to download the data if it is not present.
126        offsets: Offset values for affinity computation used as target.
127        boundaries: Whether to compute boundaries as the target.
128        binary: Whether to use a binary segmentation target.
129        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
130
131    Returns:
132        The DataLoader.
133    """
134    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
135    dataset = get_covid_if_dataset(
136        path, patch_shape, sample_range=sample_range, target=target, download=download,
137        offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs,
138    )
139    loader = torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
140    return loader
COVID_IF_URL = 'https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1'
CHECKSUM = 'd9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1'
def get_covid_if_data(path: Union[os.PathLike, str], download: bool) -> str:
21def get_covid_if_data(path: Union[os.PathLike, str], download: bool) -> str:
22    """Download the CovidIF training data.
23
24    Args:
25        path: Filepath to a folder where the downloaded data will be saved.
26        download: Whether to download the data if it is not present.
27
28    Returns:
29        The filepath to the training data.
30    """
31    url = COVID_IF_URL
32    checksum = CHECKSUM
33
34    if os.path.exists(path):
35        return path
36
37    os.makedirs(path, exist_ok=True)
38    zip_path = os.path.join(path, "covid-if.zip")
39    util.download_source(zip_path, url, download, checksum)
40    util.unzip(zip_path, path, True)
41
42    return path

Download the CovidIF training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the training data.

def get_covid_if_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 45def get_covid_if_dataset(
 46    path: Union[os.PathLike, str],
 47    patch_shape: Tuple[int, int],
 48    sample_range: Optional[Tuple[int, int]] = None,
 49    target: str = "cells",
 50    download: bool = False,
 51    offsets: Optional[List[List[int]]] = None,
 52    boundaries: bool = False,
 53    binary: bool = False,
 54    **kwargs
 55) -> Dataset:
 56    """Get the CovidIF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
 57
 58    Args:
 59        path: Filepath to a folder where the downloaded data will be saved.
 60        patch_shape: The patch shape to use for training.
 61        sample_range: Id range of samples to load from the training dataset.
 62        target: The segmentation task. Either 'cells' or 'nuclei'.
 63        download: Whether to download the data if it is not present.
 64        offsets: Offset values for affinity computation used as target.
 65        boundaries: Whether to compute boundaries as the target.
 66        binary: Whether to use a binary segmentation target.
 67        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 68
 69    Returns:
 70       The segmentation dataset.
 71    """
 72    available_targets = ("cells", "nuclei")
 73    # TODO also support infected_cells
 74    # available_targets = ("cells", "nuclei", "infected_cells")
 75    assert target in available_targets, f"{target} not found in {available_targets}"
 76
 77    if target == "cells":
 78        raw_key = "raw/serum_IgG/s0"
 79        label_key = "labels/cells/s0"
 80    elif target == "nuclei":
 81        raw_key = "raw/nuclei/s0"
 82        label_key = "labels/nuclei/s0"
 83
 84    get_covid_if_data(path, download)
 85
 86    file_paths = sorted(glob(os.path.join(path, "*.h5")))
 87    if sample_range is not None:
 88        start, stop = sample_range
 89        if start is None:
 90            start = 0
 91        if stop is None:
 92            stop = len(file_paths)
 93        file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)]
 94        assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}"
 95
 96    kwargs, _ = util.add_instance_label_transform(
 97        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
 98    )
 99    kwargs = util.update_kwargs(kwargs, "ndim", 2)
100
101    return torch_em.default_segmentation_dataset(
102        file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs
103    )

Get the CovidIF dataset for segmenting nuclei or cells in immunofluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_range: Id range of samples to load from the training dataset.
  • target: The segmentation task. Either 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_covid_if_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
106def get_covid_if_loader(
107    path: Union[os.PathLike, str],
108    patch_shape: Tuple[int, int],
109    batch_size: int,
110    sample_range: Optional[Tuple[int, int]] = None,
111    target: str = "cells",
112    download: bool = False,
113    offsets: Optional[List[List[int]]] = None,
114    boundaries: bool = False,
115    binary: bool = False,
116    **kwargs
117) -> DataLoader:
118    """Get the CovidIF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
119
120    Args:
121        path: Filepath to a folder where the downloaded data will be saved.
122        patch_shape: The patch shape to use for training.
123        batch_size: The batch size for training.
124        sample_range: Id range of samples to load from the training dataset.
125        target: The segmentation task. Either 'cells' or 'nuclei'.
126        download: Whether to download the data if it is not present.
127        offsets: Offset values for affinity computation used as target.
128        boundaries: Whether to compute boundaries as the target.
129        binary: Whether to use a binary segmentation target.
130        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
131
132    Returns:
133        The DataLoader.
134    """
135    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
136    dataset = get_covid_if_dataset(
137        path, patch_shape, sample_range=sample_range, target=target, download=download,
138        offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs,
139    )
140    loader = torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
141    return loader

Get the CovidIF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_range: Id range of samples to load from the training dataset.
  • target: The segmentation task. Either 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.