torch_em.data.datasets.light_microscopy.vicar

This dataset contains annotations for cell segmentation for label-free live cell quantitative phase microscopy images.

NOTE: This dataset also provides large unlabeled data for pretraining / self-supervised methods.

The dataset is located at https://zenodo.org/records/5153251. This dataset is from the publication https://doi.org/10.1364/BOE.433212. Please cite it if you use this dataset in your research.

View Source

  1"""This dataset contains annotations for cell segmentation for
  2label-free live cell quantitative phase microscopy images.
  3
  4NOTE: This dataset also provides large unlabeled data for pretraining / self-supervised methods.
  5
  6The dataset is located at https://zenodo.org/records/5153251.
  7This dataset is from the publication https://doi.org/10.1364/BOE.433212.
  8Please cite it if you use this dataset in your research.
  9"""
 10
 11import os
 12from glob import glob
 13from natsort import natsorted
 14from typing import Union, Tuple, List, Optional, Sequence
 15
 16import torch_em
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20from .. import util
 21
 22
 23URL = {
 24    "labelled": "https://zenodo.org/record/5153251/files/labelled.zip",
 25    "unlabelled": "https://zenodo.org/record/5153251/files/unlabelled.zip"
 26}
 27
 28CHECKSUMS = {
 29    "labelled": "e4b6fc8ad3955c4e0fe0e95a9be03d4333b6d9029f675ae9652084cefc4aaab6",
 30    "unlabelled": "c0228c56140d16141a5f9fb303080861624d6d2d25fab5bd463e489dab9adf4b"
 31}
 32
 33VALID_CELL_TYPES = ["A2058", "G361", "HOB", "PC3", "PNT1A"]
 34
 35
 36def get_vicar_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 37    """Download the VICAR dataset.
 38
 39    Args:
 40        path: Filepath to a folder where the downloaded data will be saved.
 41        download: Whether to download the data if it is not present.
 42
 43    Returns:
 44        The filepath to the training data.
 45    """
 46    # NOTE: We hard-code everything to the 'labeled' data split.
 47    data_dir = os.path.join(path, "labelled")
 48    if os.path.exists(data_dir):
 49        return data_dir
 50
 51    os.makedirs(data_dir, exist_ok=True)
 52
 53    zip_path = os.path.join(path, "labelled.zip")
 54    util.download_source(path=zip_path, url=URL["labelled"], download=download, checksum=CHECKSUMS["labelled"])
 55    util.unzip(zip_path=zip_path, dst=data_dir)
 56
 57    return data_dir
 58
 59
 60def get_vicar_paths(
 61    path: Union[os.PathLike, str],
 62    cell_types: Optional[Union[Sequence[str], str]] = None,
 63    download: bool = False
 64) -> Tuple[List[str], List[str]]:
 65    """Get paths to the VICAR data.
 66
 67    Args:
 68        path: Filepath to a folder where the downloaded data will be saved.
 69        cell_types: The choice of cell types. By default, selects all cell types.
 70        download: Whether to download the data if it is not present.
 71
 72    Returns:
 73        List of filepaths for the image data.
 74        List of filepaths for the label data.
 75    """
 76    data_dir = get_vicar_data(path, download)
 77
 78    if cell_types is not None and isinstance(cell_types, str):
 79        raise ValueError("The choice of cell types should be a sequence of string values.")
 80
 81    if cell_types is None:
 82        cell_types = VALID_CELL_TYPES
 83    else:
 84        if isinstance(cell_types, str):
 85            cell_types = [cell_types]
 86
 87    raw_paths, label_paths = [], []
 88    for cell_type in cell_types:
 89        assert cell_type in VALID_CELL_TYPES
 90
 91        raw_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_img.tif"))))
 92        label_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_mask.png"))))
 93
 94    return raw_paths, label_paths
 95
 96
 97def get_vicar_dataset(
 98    path: Union[os.PathLike, str],
 99    patch_shape: Tuple[int, int],
100    cell_types: Optional[Union[Sequence[str], str]] = None,
101    download: bool = False,
102    **kwargs
103) -> Dataset:
104    """Get the VICAR dataset for cell segmentation in quantitative phase microscopy.
105
106    Args:
107        path: Filepath to a folder where the downloaded data will be saved.
108        patch_shape: The patch shape to use for training.
109        cell_types: The choice of cell types. By default, selects all cell types.
110        download: Whether to download the data if it is not present.
111        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
112
113    Returns:
114        The segmentation dataset.
115    """
116    raw_paths, label_paths = get_vicar_paths(path, cell_types, download)
117
118    return torch_em.default_segmentation_dataset(
119        raw_paths=raw_paths,
120        raw_key=None,
121        label_paths=label_paths,
122        label_key=None,
123        patch_shape=patch_shape,
124        is_seg_dataset=False,
125        **kwargs
126    )
127
128
129def get_vicar_loader(
130    path: Union[os.PathLike, str],
131    batch_size: int,
132    patch_shape: Tuple[int, int],
133    cell_types: Optional[Union[Sequence[str], str]] = None,
134    download: bool = False,
135    **kwargs
136) -> DataLoader:
137    """Get the VICAR dataloader for cell segmentation in quantitative phase microscopy.
138
139    Args:
140        path: Filepath to a folder where the downloaded data will be saved.
141        batch_size: The batch size for training.
142        patch_shape: The patch shape to use for training.
143        cell_types: The choice of cell types. By default, selects all cell types.
144        download: Whether to download the data if it is not present.
145        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
146
147    Returns:
148        The DataLoader.
149    """
150    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
151    dataset = get_vicar_dataset(path, patch_shape, cell_types, download, **ds_kwargs)
152    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = {'labelled': 'https://zenodo.org/record/5153251/files/labelled.zip', 'unlabelled': 'https://zenodo.org/record/5153251/files/unlabelled.zip'}

CHECKSUMS = {'labelled': 'e4b6fc8ad3955c4e0fe0e95a9be03d4333b6d9029f675ae9652084cefc4aaab6', 'unlabelled': 'c0228c56140d16141a5f9fb303080861624d6d2d25fab5bd463e489dab9adf4b'}

VALID_CELL_TYPES = ['A2058', 'G361', 'HOB', 'PC3', 'PNT1A']

def get_vicar_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

37def get_vicar_data(path: Union[os.PathLike, str], download: bool = False) -> str:
38    """Download the VICAR dataset.
39
40    Args:
41        path: Filepath to a folder where the downloaded data will be saved.
42        download: Whether to download the data if it is not present.
43
44    Returns:
45        The filepath to the training data.
46    """
47    # NOTE: We hard-code everything to the 'labeled' data split.
48    data_dir = os.path.join(path, "labelled")
49    if os.path.exists(data_dir):
50        return data_dir
51
52    os.makedirs(data_dir, exist_ok=True)
53
54    zip_path = os.path.join(path, "labelled.zip")
55    util.download_source(path=zip_path, url=URL["labelled"], download=download, checksum=CHECKSUMS["labelled"])
56    util.unzip(zip_path=zip_path, dst=data_dir)
57
58    return data_dir

Download the VICAR dataset.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

The filepath to the training data.

def get_vicar_paths( path: Union[os.PathLike, str], cell_types: Union[Sequence[str], str, NoneType] = None, download: bool = False) -> Tuple[List[str], List[str]]: View Source

61def get_vicar_paths(
62    path: Union[os.PathLike, str],
63    cell_types: Optional[Union[Sequence[str], str]] = None,
64    download: bool = False
65) -> Tuple[List[str], List[str]]:
66    """Get paths to the VICAR data.
67
68    Args:
69        path: Filepath to a folder where the downloaded data will be saved.
70        cell_types: The choice of cell types. By default, selects all cell types.
71        download: Whether to download the data if it is not present.
72
73    Returns:
74        List of filepaths for the image data.
75        List of filepaths for the label data.
76    """
77    data_dir = get_vicar_data(path, download)
78
79    if cell_types is not None and isinstance(cell_types, str):
80        raise ValueError("The choice of cell types should be a sequence of string values.")
81
82    if cell_types is None:
83        cell_types = VALID_CELL_TYPES
84    else:
85        if isinstance(cell_types, str):
86            cell_types = [cell_types]
87
88    raw_paths, label_paths = [], []
89    for cell_type in cell_types:
90        assert cell_type in VALID_CELL_TYPES
91
92        raw_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_img.tif"))))
93        label_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_mask.png"))))
94
95    return raw_paths, label_paths

Get paths to the VICAR data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
cell_types: The choice of cell types. By default, selects all cell types.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_vicar_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], cell_types: Union[Sequence[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 98def get_vicar_dataset(
 99    path: Union[os.PathLike, str],
100    patch_shape: Tuple[int, int],
101    cell_types: Optional[Union[Sequence[str], str]] = None,
102    download: bool = False,
103    **kwargs
104) -> Dataset:
105    """Get the VICAR dataset for cell segmentation in quantitative phase microscopy.
106
107    Args:
108        path: Filepath to a folder where the downloaded data will be saved.
109        patch_shape: The patch shape to use for training.
110        cell_types: The choice of cell types. By default, selects all cell types.
111        download: Whether to download the data if it is not present.
112        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
113
114    Returns:
115        The segmentation dataset.
116    """
117    raw_paths, label_paths = get_vicar_paths(path, cell_types, download)
118
119    return torch_em.default_segmentation_dataset(
120        raw_paths=raw_paths,
121        raw_key=None,
122        label_paths=label_paths,
123        label_key=None,
124        patch_shape=patch_shape,
125        is_seg_dataset=False,
126        **kwargs
127    )

Get the VICAR dataset for cell segmentation in quantitative phase microscopy.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
cell_types: The choice of cell types. By default, selects all cell types.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_vicar_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], cell_types: Union[Sequence[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

130def get_vicar_loader(
131    path: Union[os.PathLike, str],
132    batch_size: int,
133    patch_shape: Tuple[int, int],
134    cell_types: Optional[Union[Sequence[str], str]] = None,
135    download: bool = False,
136    **kwargs
137) -> DataLoader:
138    """Get the VICAR dataloader for cell segmentation in quantitative phase microscopy.
139
140    Args:
141        path: Filepath to a folder where the downloaded data will be saved.
142        batch_size: The batch size for training.
143        patch_shape: The patch shape to use for training.
144        cell_types: The choice of cell types. By default, selects all cell types.
145        download: Whether to download the data if it is not present.
146        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
147
148    Returns:
149        The DataLoader.
150    """
151    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
152    dataset = get_vicar_dataset(path, patch_shape, cell_types, download, **ds_kwargs)
153    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the VICAR dataloader for cell segmentation in quantitative phase microscopy.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
cell_types: The choice of cell types. By default, selects all cell types.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.