torch_em.data.datasets.light_microscopy.vicar
This dataset contains annotations for cell segmentation for label-free live cell quantitative phase microscopy images.
NOTE: This dataset also provides large unlabeled data for pretraining / self-supervised methods.
The dataset is located at https://zenodo.org/records/5153251. This dataset is from the publication https://doi.org/10.1364/BOE.433212. Please cite it if you use this dataset in your research.
1"""This dataset contains annotations for cell segmentation for 2label-free live cell quantitative phase microscopy images. 3 4NOTE: This dataset also provides large unlabeled data for pretraining / self-supervised methods. 5 6The dataset is located at https://zenodo.org/records/5153251. 7This dataset is from the publication https://doi.org/10.1364/BOE.433212. 8Please cite it if you use this dataset in your research. 9""" 10 11import os 12from glob import glob 13from natsort import natsorted 14from typing import Union, Tuple, List, Optional, Sequence 15 16import torch_em 17 18from torch.utils.data import Dataset, DataLoader 19 20from .. import util 21 22 23URL = { 24 "labelled": "https://zenodo.org/record/5153251/files/labelled.zip", 25 "unlabelled": "https://zenodo.org/record/5153251/files/unlabelled.zip" 26} 27 28CHECKSUMS = { 29 "labelled": "e4b6fc8ad3955c4e0fe0e95a9be03d4333b6d9029f675ae9652084cefc4aaab6", 30 "unlabelled": "c0228c56140d16141a5f9fb303080861624d6d2d25fab5bd463e489dab9adf4b" 31} 32 33VALID_CELL_TYPES = ["A2058", "G361", "HOB", "PC3", "PNT1A"] 34 35 36def get_vicar_data(path: Union[os.PathLike, str], download: bool = False) -> str: 37 """Download the VICAR dataset. 38 39 Args: 40 path: Filepath to a folder where the downloaded data will be saved. 41 download: Whether to download the data if it is not present. 42 43 Returns: 44 The filepath to the training data. 45 """ 46 # NOTE: We hard-code everything to the 'labeled' data split. 47 data_dir = os.path.join(path, "labelled") 48 if os.path.exists(data_dir): 49 return data_dir 50 51 os.makedirs(data_dir, exist_ok=True) 52 53 zip_path = os.path.join(path, "labelled.zip") 54 util.download_source(path=zip_path, url=URL["labelled"], download=download, checksum=CHECKSUMS["labelled"]) 55 util.unzip(zip_path=zip_path, dst=data_dir) 56 57 return data_dir 58 59 60def get_vicar_paths( 61 path: Union[os.PathLike, str], 62 cell_types: Optional[Union[Sequence[str], str]] = None, 63 download: bool = False 64) -> Tuple[List[str], List[str]]: 65 """Get paths to the VICAR data. 66 67 Args: 68 path: Filepath to a folder where the downloaded data will be saved. 69 cell_types: The choice of cell types. By default, selects all cell types. 70 download: Whether to download the data if it is not present. 71 72 Returns: 73 List of filepaths for the image data. 74 List of filepaths for the label data. 75 """ 76 data_dir = get_vicar_data(path, download) 77 78 if cell_types is not None and isinstance(cell_types, str): 79 raise ValueError("The choice of cell types should be a sequence of string values.") 80 81 if cell_types is None: 82 cell_types = VALID_CELL_TYPES 83 else: 84 if isinstance(cell_types, str): 85 cell_types = [cell_types] 86 87 raw_paths, label_paths = [], [] 88 for cell_type in cell_types: 89 assert cell_type in VALID_CELL_TYPES 90 91 raw_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_img.tif")))) 92 label_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_mask.png")))) 93 94 return raw_paths, label_paths 95 96 97def get_vicar_dataset( 98 path: Union[os.PathLike, str], 99 patch_shape: Tuple[int, int], 100 cell_types: Optional[Union[Sequence[str], str]] = None, 101 download: bool = False, 102 **kwargs 103) -> Dataset: 104 """Get the VICAR dataset for cell segmentation in quantitative phase microscopy. 105 106 Args: 107 path: Filepath to a folder where the downloaded data will be saved. 108 patch_shape: The patch shape to use for training. 109 cell_types: The choice of cell types. By default, selects all cell types. 110 download: Whether to download the data if it is not present. 111 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 112 113 Returns: 114 The segmentation dataset. 115 """ 116 raw_paths, label_paths = get_vicar_paths(path, cell_types, download) 117 118 return torch_em.default_segmentation_dataset( 119 raw_paths=raw_paths, 120 raw_key=None, 121 label_paths=label_paths, 122 label_key=None, 123 patch_shape=patch_shape, 124 is_seg_dataset=False, 125 **kwargs 126 ) 127 128 129def get_vicar_loader( 130 path: Union[os.PathLike, str], 131 batch_size: int, 132 patch_shape: Tuple[int, int], 133 cell_types: Optional[Union[Sequence[str], str]] = None, 134 download: bool = False, 135 **kwargs 136) -> DataLoader: 137 """Get the VICAR dataloader for cell segmentation in quantitative phase microscopy. 138 139 Args: 140 path: Filepath to a folder where the downloaded data will be saved. 141 batch_size: The batch size for training. 142 patch_shape: The patch shape to use for training. 143 cell_types: The choice of cell types. By default, selects all cell types. 144 download: Whether to download the data if it is not present. 145 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 146 147 Returns: 148 The DataLoader. 149 """ 150 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 151 dataset = get_vicar_dataset(path, patch_shape, cell_types, download, **ds_kwargs) 152 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
37def get_vicar_data(path: Union[os.PathLike, str], download: bool = False) -> str: 38 """Download the VICAR dataset. 39 40 Args: 41 path: Filepath to a folder where the downloaded data will be saved. 42 download: Whether to download the data if it is not present. 43 44 Returns: 45 The filepath to the training data. 46 """ 47 # NOTE: We hard-code everything to the 'labeled' data split. 48 data_dir = os.path.join(path, "labelled") 49 if os.path.exists(data_dir): 50 return data_dir 51 52 os.makedirs(data_dir, exist_ok=True) 53 54 zip_path = os.path.join(path, "labelled.zip") 55 util.download_source(path=zip_path, url=URL["labelled"], download=download, checksum=CHECKSUMS["labelled"]) 56 util.unzip(zip_path=zip_path, dst=data_dir) 57 58 return data_dir
Download the VICAR dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
61def get_vicar_paths( 62 path: Union[os.PathLike, str], 63 cell_types: Optional[Union[Sequence[str], str]] = None, 64 download: bool = False 65) -> Tuple[List[str], List[str]]: 66 """Get paths to the VICAR data. 67 68 Args: 69 path: Filepath to a folder where the downloaded data will be saved. 70 cell_types: The choice of cell types. By default, selects all cell types. 71 download: Whether to download the data if it is not present. 72 73 Returns: 74 List of filepaths for the image data. 75 List of filepaths for the label data. 76 """ 77 data_dir = get_vicar_data(path, download) 78 79 if cell_types is not None and isinstance(cell_types, str): 80 raise ValueError("The choice of cell types should be a sequence of string values.") 81 82 if cell_types is None: 83 cell_types = VALID_CELL_TYPES 84 else: 85 if isinstance(cell_types, str): 86 cell_types = [cell_types] 87 88 raw_paths, label_paths = [], [] 89 for cell_type in cell_types: 90 assert cell_type in VALID_CELL_TYPES 91 92 raw_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_img.tif")))) 93 label_paths.extend(natsorted(glob(os.path.join(data_dir, cell_type, "*_mask.png")))) 94 95 return raw_paths, label_paths
Get paths to the VICAR data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- cell_types: The choice of cell types. By default, selects all cell types.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
98def get_vicar_dataset( 99 path: Union[os.PathLike, str], 100 patch_shape: Tuple[int, int], 101 cell_types: Optional[Union[Sequence[str], str]] = None, 102 download: bool = False, 103 **kwargs 104) -> Dataset: 105 """Get the VICAR dataset for cell segmentation in quantitative phase microscopy. 106 107 Args: 108 path: Filepath to a folder where the downloaded data will be saved. 109 patch_shape: The patch shape to use for training. 110 cell_types: The choice of cell types. By default, selects all cell types. 111 download: Whether to download the data if it is not present. 112 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 113 114 Returns: 115 The segmentation dataset. 116 """ 117 raw_paths, label_paths = get_vicar_paths(path, cell_types, download) 118 119 return torch_em.default_segmentation_dataset( 120 raw_paths=raw_paths, 121 raw_key=None, 122 label_paths=label_paths, 123 label_key=None, 124 patch_shape=patch_shape, 125 is_seg_dataset=False, 126 **kwargs 127 )
Get the VICAR dataset for cell segmentation in quantitative phase microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- cell_types: The choice of cell types. By default, selects all cell types.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
130def get_vicar_loader( 131 path: Union[os.PathLike, str], 132 batch_size: int, 133 patch_shape: Tuple[int, int], 134 cell_types: Optional[Union[Sequence[str], str]] = None, 135 download: bool = False, 136 **kwargs 137) -> DataLoader: 138 """Get the VICAR dataloader for cell segmentation in quantitative phase microscopy. 139 140 Args: 141 path: Filepath to a folder where the downloaded data will be saved. 142 batch_size: The batch size for training. 143 patch_shape: The patch shape to use for training. 144 cell_types: The choice of cell types. By default, selects all cell types. 145 download: Whether to download the data if it is not present. 146 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 147 148 Returns: 149 The DataLoader. 150 """ 151 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 152 dataset = get_vicar_dataset(path, patch_shape, cell_types, download, **ds_kwargs) 153 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the VICAR dataloader for cell segmentation in quantitative phase microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- cell_types: The choice of cell types. By default, selects all cell types.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.