torch_em.data.datasets.light_microscopy.covid_if

This dataset contains annotation for cell and nucleus segmentation in immunofluorescence microscopy.

This dataset is from the publication https://doi.org/10.1002/bies.202000257. Please cite it if you use this dataset in your research.

  1"""This dataset contains annotation for cell and nucleus segmentation
  2in immunofluorescence microscopy.
  3
  4This dataset is from the publication https://doi.org/10.1002/bies.202000257.
  5Please cite it if you use this dataset in your research.
  6"""
  7
  8import os
  9from glob import glob
 10from typing import List, Optional, Tuple, Union
 11
 12from torch.utils.data import Dataset, DataLoader
 13
 14import torch_em
 15
 16from .. import util
 17
 18
 19COVID_IF_URL = "https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1"
 20CHECKSUM = "d9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1"
 21
 22
 23def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 24    """Download the Covid-IF training data.
 25
 26    Args:
 27        path: Filepath to a folder where the downloaded data will be saved.
 28        download: Whether to download the data if it is not present.
 29
 30    Returns:
 31        The filepath to the training data.
 32    """
 33    url = COVID_IF_URL
 34    checksum = CHECKSUM
 35
 36    if os.path.exists(path):
 37        return path
 38
 39    os.makedirs(path, exist_ok=True)
 40    zip_path = os.path.join(path, "covid-if.zip")
 41    util.download_source(zip_path, url, download, checksum)
 42    util.unzip(zip_path, path, True)
 43
 44    return path
 45
 46
 47def get_covid_if_paths(
 48    path: Union[os.PathLike, str],
 49    sample_range: Optional[Tuple[int, int]] = None,
 50    download: bool = False
 51) -> List[str]:
 52    """Get paths to the Covid-IF data.
 53
 54    Args:
 55        path: Filepath to a folder where the downloaded data will be saved.
 56        sample_range: Id range of samples to load from the training dataset.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        List of filepaths to the stored data.
 61    """
 62    get_covid_if_data(path, download)
 63
 64    file_paths = sorted(glob(os.path.join(path, "*.h5")))
 65    if sample_range is not None:
 66        start, stop = sample_range
 67        if start is None:
 68            start = 0
 69        if stop is None:
 70            stop = len(file_paths)
 71        file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)]
 72        assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}"
 73
 74    return file_paths
 75
 76
 77def get_covid_if_dataset(
 78    path: Union[os.PathLike, str],
 79    patch_shape: Tuple[int, int],
 80    sample_range: Optional[Tuple[int, int]] = None,
 81    target: str = "cells",
 82    download: bool = False,
 83    offsets: Optional[List[List[int]]] = None,
 84    boundaries: bool = False,
 85    binary: bool = False,
 86    **kwargs
 87) -> Dataset:
 88    """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
 89
 90    Args:
 91        path: Filepath to a folder where the downloaded data will be saved.
 92        patch_shape: The patch shape to use for training.
 93        sample_range: Id range of samples to load from the training dataset.
 94        target: The segmentation task. Either 'cells' or 'nuclei'.
 95        download: Whether to download the data if it is not present.
 96        offsets: Offset values for affinity computation used as target.
 97        boundaries: Whether to compute boundaries as the target.
 98        binary: Whether to use a binary segmentation target.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
100
101    Returns:
102       The segmentation dataset.
103    """
104    available_targets = ("cells", "nuclei")
105    # TODO also support infected_cells
106    # available_targets = ("cells", "nuclei", "infected_cells")
107
108    if target == "cells":
109        raw_key = "raw/serum_IgG/s0"
110        label_key = "labels/cells/s0"
111    elif target == "nuclei":
112        raw_key = "raw/nuclei/s0"
113        label_key = "labels/nuclei/s0"
114    else:
115        raise ValueError(f"{target} not found in {available_targets}")
116
117    file_paths = get_covid_if_paths(path, sample_range, download)
118
119    kwargs, _ = util.add_instance_label_transform(
120        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
121    )
122    kwargs = util.update_kwargs(kwargs, "ndim", 2)
123
124    return torch_em.default_segmentation_dataset(
125        raw_paths=file_paths,
126        raw_key=raw_key,
127        label_paths=file_paths,
128        label_key=label_key,
129        patch_shape=patch_shape,
130        **kwargs
131    )
132
133
134def get_covid_if_loader(
135    path: Union[os.PathLike, str],
136    patch_shape: Tuple[int, int],
137    batch_size: int,
138    sample_range: Optional[Tuple[int, int]] = None,
139    target: str = "cells",
140    download: bool = False,
141    offsets: Optional[List[List[int]]] = None,
142    boundaries: bool = False,
143    binary: bool = False,
144    **kwargs
145) -> DataLoader:
146    """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
147
148    Args:
149        path: Filepath to a folder where the downloaded data will be saved.
150        patch_shape: The patch shape to use for training.
151        batch_size: The batch size for training.
152        sample_range: Id range of samples to load from the training dataset.
153        target: The segmentation task. Either 'cells' or 'nuclei'.
154        download: Whether to download the data if it is not present.
155        offsets: Offset values for affinity computation used as target.
156        boundaries: Whether to compute boundaries as the target.
157        binary: Whether to use a binary segmentation target.
158        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
159
160    Returns:
161        The DataLoader.
162    """
163    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
164    dataset = get_covid_if_dataset(
165        path, patch_shape, sample_range=sample_range, target=target, download=download,
166        offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs,
167    )
168    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
COVID_IF_URL = 'https://zenodo.org/record/5092850/files/covid-if-groundtruth.zip?download=1'
CHECKSUM = 'd9cd6c85a19b802c771fb4ff928894b19a8fab0e0af269c49235fdac3f7a60e1'
def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24def get_covid_if_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25    """Download the Covid-IF training data.
26
27    Args:
28        path: Filepath to a folder where the downloaded data will be saved.
29        download: Whether to download the data if it is not present.
30
31    Returns:
32        The filepath to the training data.
33    """
34    url = COVID_IF_URL
35    checksum = CHECKSUM
36
37    if os.path.exists(path):
38        return path
39
40    os.makedirs(path, exist_ok=True)
41    zip_path = os.path.join(path, "covid-if.zip")
42    util.download_source(zip_path, url, download, checksum)
43    util.unzip(zip_path, path, True)
44
45    return path

Download the Covid-IF training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the training data.

def get_covid_if_paths( path: Union[os.PathLike, str], sample_range: Optional[Tuple[int, int]] = None, download: bool = False) -> List[str]:
48def get_covid_if_paths(
49    path: Union[os.PathLike, str],
50    sample_range: Optional[Tuple[int, int]] = None,
51    download: bool = False
52) -> List[str]:
53    """Get paths to the Covid-IF data.
54
55    Args:
56        path: Filepath to a folder where the downloaded data will be saved.
57        sample_range: Id range of samples to load from the training dataset.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        List of filepaths to the stored data.
62    """
63    get_covid_if_data(path, download)
64
65    file_paths = sorted(glob(os.path.join(path, "*.h5")))
66    if sample_range is not None:
67        start, stop = sample_range
68        if start is None:
69            start = 0
70        if stop is None:
71            stop = len(file_paths)
72        file_paths = [os.path.join(path, f"gt_image_{idx:03}.h5") for idx in range(start, stop)]
73        assert all(os.path.exists(fp) for fp in file_paths), f"Invalid sample range {sample_range}"
74
75    return file_paths

Get paths to the Covid-IF data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • sample_range: Id range of samples to load from the training dataset.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths to the stored data.

def get_covid_if_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 78def get_covid_if_dataset(
 79    path: Union[os.PathLike, str],
 80    patch_shape: Tuple[int, int],
 81    sample_range: Optional[Tuple[int, int]] = None,
 82    target: str = "cells",
 83    download: bool = False,
 84    offsets: Optional[List[List[int]]] = None,
 85    boundaries: bool = False,
 86    binary: bool = False,
 87    **kwargs
 88) -> Dataset:
 89    """Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.
 90
 91    Args:
 92        path: Filepath to a folder where the downloaded data will be saved.
 93        patch_shape: The patch shape to use for training.
 94        sample_range: Id range of samples to load from the training dataset.
 95        target: The segmentation task. Either 'cells' or 'nuclei'.
 96        download: Whether to download the data if it is not present.
 97        offsets: Offset values for affinity computation used as target.
 98        boundaries: Whether to compute boundaries as the target.
 99        binary: Whether to use a binary segmentation target.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103       The segmentation dataset.
104    """
105    available_targets = ("cells", "nuclei")
106    # TODO also support infected_cells
107    # available_targets = ("cells", "nuclei", "infected_cells")
108
109    if target == "cells":
110        raw_key = "raw/serum_IgG/s0"
111        label_key = "labels/cells/s0"
112    elif target == "nuclei":
113        raw_key = "raw/nuclei/s0"
114        label_key = "labels/nuclei/s0"
115    else:
116        raise ValueError(f"{target} not found in {available_targets}")
117
118    file_paths = get_covid_if_paths(path, sample_range, download)
119
120    kwargs, _ = util.add_instance_label_transform(
121        kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets
122    )
123    kwargs = util.update_kwargs(kwargs, "ndim", 2)
124
125    return torch_em.default_segmentation_dataset(
126        raw_paths=file_paths,
127        raw_key=raw_key,
128        label_paths=file_paths,
129        label_key=label_key,
130        patch_shape=patch_shape,
131        **kwargs
132    )

Get the Covid-IF dataset for segmenting nuclei or cells in immunofluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_range: Id range of samples to load from the training dataset.
  • target: The segmentation task. Either 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_covid_if_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_range: Optional[Tuple[int, int]] = None, target: str = 'cells', download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
135def get_covid_if_loader(
136    path: Union[os.PathLike, str],
137    patch_shape: Tuple[int, int],
138    batch_size: int,
139    sample_range: Optional[Tuple[int, int]] = None,
140    target: str = "cells",
141    download: bool = False,
142    offsets: Optional[List[List[int]]] = None,
143    boundaries: bool = False,
144    binary: bool = False,
145    **kwargs
146) -> DataLoader:
147    """Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.
148
149    Args:
150        path: Filepath to a folder where the downloaded data will be saved.
151        patch_shape: The patch shape to use for training.
152        batch_size: The batch size for training.
153        sample_range: Id range of samples to load from the training dataset.
154        target: The segmentation task. Either 'cells' or 'nuclei'.
155        download: Whether to download the data if it is not present.
156        offsets: Offset values for affinity computation used as target.
157        boundaries: Whether to compute boundaries as the target.
158        binary: Whether to use a binary segmentation target.
159        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
160
161    Returns:
162        The DataLoader.
163    """
164    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
165    dataset = get_covid_if_dataset(
166        path, patch_shape, sample_range=sample_range, target=target, download=download,
167        offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs,
168    )
169    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)

Get the Covid-IF dataloder for segmenting nuclei or cells in immunofluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_range: Id range of samples to load from the training dataset.
  • target: The segmentation task. Either 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.