torch_em.data.datasets.light_microscopy.covid_if

This dataset contains annotation for cell and nucleus segmentation in immunofluorescence microscopy.

This dataset is from the publication https://doi.org/10.1002/bies.202000257. Please cite it if you use this dataset in your research.

  1"""This dataset contains annotation for cell and nucleus segmentation
  2in immunofluorescence microscopy.
  3
  4This dataset is from the publication https://doi.org/10.1002/bies.202000257.
  5Please cite it if you use this dataset in your research.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import List, Optional, Tuple, Union
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19COVID_IF_URL = "https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1"
 20CHECKSUM = "d9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1"
 21
 22
 23def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 24    """Download the Covid-IF training data.
 25
 26    Args:
 27        path: Filepath to a folder where the downloaded data will be saved.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        The filepath to the training data.
 32    """
 33    url = COVID_IF_URL
 34    checksum = CHECKSUM
 35
 36    if os.path.exists(path):
 37        return path
 38
 39    os.makedirs(path, exist_ok=True)
 40    zip_path = os.path.join(path, "covid-if.zip")
 41    util.download_source(zip_path, url, download, checksum)
 42    util.unzip(zip_path, path, True)
 43
 44    return path
 45
 46
 47def get_covid_if_paths(
 48    path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False,
 49) -> List[str]:
 50    """Get paths to the Covid-IF data.
 51
 52    Args:
 53        path: Filepath to a folder where the downloaded data will be saved.
 54        sample_range: Id range of samples to load from the training dataset.
 55        download: Whether to download the data if it is not present.
 56
 57    Returns:
 58        List of filepaths to the stored data.
 59    """
 60    get_covid_if_data(path, download)
 61
 62    file_paths = sorted(glob(os.path.join(path, "*.h5")))
 63    if sample_range is not None:
 64        start, stop = sample_range
 65        if start is None:
 66            start = 0
 67        if stop is None:
 68            stop = len(file_paths)
 69        file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)]
 70        assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}"
 71
 72    return file_paths
 73
 74
 75def get_covid_if_dataset(
 76    path: Union[os.PathLike, str],
 77    patch_shape: Tuple[int, int],
 78    sample_range: Optional[Tuple[int, int]] = None,
 79    target: str = "cells",
 80    download: bool = False,
 81    offsets: Optional[List[List[int]]] = None,
 82    boundaries: bool = False,
 83    binary: bool = False,
 84    **kwargs
 85) -> Dataset:
 86    """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
 87
 88    Args:
 89        path: Filepath to a folder where the downloaded data will be saved.
 90        patch_shape: The patch shape to use for training.
 91        sample_range: Id range of samples to load from the training dataset.
 92        target: The segmentation task. Either 'cells' or 'nuclei'.
 93        download: Whether to download the data if it is not present.
 94        offsets: Offset values for affinity computation used as target.
 95        boundaries: Whether to compute boundaries as the target.
 96        binary: Whether to use a binary segmentation target.
 97        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 98
 99    Returns:
100       The segmentation dataset.
101    """
102    available_targets = ("cells", "nuclei")
103    # TODO also support infected_cells
104    # available_targets = ("cells", "nuclei", "infected_cells")
105
106    if target == "cells":
107        raw_key = "raw/serum_IgG/s0"
108        label_key = "labels/cells/s0"
109    elif target == "nuclei":
110        raw_key = "raw/nuclei/s0"
111        label_key = "labels/nuclei/s0"
112    else:
113        raise ValueError(f"{target} not found in {available_targets}")
114
115    file_paths = get_covid_if_paths(path, sample_range, download)
116
117    kwargs, _ = util.add_instance_label_transform(
118        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
119    )
120    kwargs = util.update_kwargs(kwargs, "ndim", 2)
121
122    return torch_em.default_segmentation_dataset(
123        raw_paths=file_paths,
124        raw_key=raw_key,
125        label_paths=file_paths,
126        label_key=label_key,
127        patch_shape=patch_shape,
128        **kwargs
129    )
130
131
132def get_covid_if_loader(
133    path: Union[os.PathLike, str],
134    patch_shape: Tuple[int, int],
135    batch_size: int,
136    sample_range: Optional[Tuple[int, int]] = None,
137    target: str = "cells",
138    download: bool = False,
139    offsets: Optional[List[List[int]]] = None,
140    boundaries: bool = False,
141    binary: bool = False,
142    **kwargs
143) -> DataLoader:
144    """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
145
146    Args:
147        path: Filepath to a folder where the downloaded data will be saved.
148        patch_shape: The patch shape to use for training.
149        batch_size: The batch size for training.
150        sample_range: Id range of samples to load from the training dataset.
151        target: The segmentation task. Either 'cells' or 'nuclei'.
152        download: Whether to download the data if it is not present.
153        offsets: Offset values for affinity computation used as target.
154        boundaries: Whether to compute boundaries as the target.
155        binary: Whether to use a binary segmentation target.
156        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
157
158    Returns:
159        The DataLoader.
160    """
161    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
162    dataset = get_covid_if_dataset(
163        path, patch_shape, sample_range=sample_range, target=target, download=download,
164        offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs,
165    )
166    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
COVID_IF_URL = 'https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1'
CHECKSUM = 'd9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1'
def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25    """Download the Covid-IF training data.
26
27    Args:
28        path: Filepath to a folder where the downloaded data will be saved.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        The filepath to the training data.
33    """
34    url = COVID_IF_URL
35    checksum = CHECKSUM
36
37    if os.path.exists(path):
38        return path
39
40    os.makedirs(path, exist_ok=True)
41    zip_path = os.path.join(path, "covid-if.zip")
42    util.download_source(zip_path, url, download, checksum)
43    util.unzip(zip_path, path, True)
44
45    return path

Download the Covid-IF training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the training data.

def get_covid_if_paths( path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False) -> List[str]:
48def get_covid_if_paths(
49    path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False,
50) -> List[str]:
51    """Get paths to the Covid-IF data.
52
53    Args:
54        path: Filepath to a folder where the downloaded data will be saved.
55        sample_range: Id range of samples to load from the training dataset.
56        download: Whether to download the data if it is not present.
57
58    Returns:
59        List of filepaths to the stored data.
60    """
61    get_covid_if_data(path, download)
62
63    file_paths = sorted(glob(os.path.join(path, "*.h5")))
64    if sample_range is not None:
65        start, stop = sample_range
66        if start is None:
67            start = 0
68        if stop is None:
69            stop = len(file_paths)
70        file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)]
71        assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}"
72
73    return file_paths

Get paths to the Covid-IF data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • sample_range: Id range of samples to load from the training dataset.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths to the stored data.

def get_covid_if_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 76def get_covid_if_dataset(
 77    path: Union[os.PathLike, str],
 78    patch_shape: Tuple[int, int],
 79    sample_range: Optional[Tuple[int, int]] = None,
 80    target: str = "cells",
 81    download: bool = False,
 82    offsets: Optional[List[List[int]]] = None,
 83    boundaries: bool = False,
 84    binary: bool = False,
 85    **kwargs
 86) -> Dataset:
 87    """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        patch_shape: The patch shape to use for training.
 92        sample_range: Id range of samples to load from the training dataset.
 93        target: The segmentation task. Either 'cells' or 'nuclei'.
 94        download: Whether to download the data if it is not present.
 95        offsets: Offset values for affinity computation used as target.
 96        boundaries: Whether to compute boundaries as the target.
 97        binary: Whether to use a binary segmentation target.
 98        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 99
100    Returns:
101       The segmentation dataset.
102    """
103    available_targets = ("cells", "nuclei")
104    # TODO also support infected_cells
105    # available_targets = ("cells", "nuclei", "infected_cells")
106
107    if target == "cells":
108        raw_key = "raw/serum_IgG/s0"
109        label_key = "labels/cells/s0"
110    elif target == "nuclei":
111        raw_key = "raw/nuclei/s0"
112        label_key = "labels/nuclei/s0"
113    else:
114        raise ValueError(f"{target} not found in {available_targets}")
115
116    file_paths = get_covid_if_paths(path, sample_range, download)
117
118    kwargs, _ = util.add_instance_label_transform(
119        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
120    )
121    kwargs = util.update_kwargs(kwargs, "ndim", 2)
122
123    return torch_em.default_segmentation_dataset(
124        raw_paths=file_paths,
125        raw_key=raw_key,
126        label_paths=file_paths,
127        label_key=label_key,
128        patch_shape=patch_shape,
129        **kwargs
130    )

Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_range: Id range of samples to load from the training dataset.
  • target: The segmentation task. Either 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_covid_if_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
133def get_covid_if_loader(
134    path: Union[os.PathLike, str],
135    patch_shape: Tuple[int, int],
136    batch_size: int,
137    sample_range: Optional[Tuple[int, int]] = None,
138    target: str = "cells",
139    download: bool = False,
140    offsets: Optional[List[List[int]]] = None,
141    boundaries: bool = False,
142    binary: bool = False,
143    **kwargs
144) -> DataLoader:
145    """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
146
147    Args:
148        path: Filepath to a folder where the downloaded data will be saved.
149        patch_shape: The patch shape to use for training.
150        batch_size: The batch size for training.
151        sample_range: Id range of samples to load from the training dataset.
152        target: The segmentation task. Either 'cells' or 'nuclei'.
153        download: Whether to download the data if it is not present.
154        offsets: Offset values for affinity computation used as target.
155        boundaries: Whether to compute boundaries as the target.
156        binary: Whether to use a binary segmentation target.
157        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
158
159    Returns:
160        The DataLoader.
161    """
162    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
163    dataset = get_covid_if_dataset(
164        path, patch_shape, sample_range=sample_range, target=target, download=download,
165        offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs,
166    )
167    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)

Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_range: Id range of samples to load from the training dataset.
  • target: The segmentation task. Either 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.