torch_em.data.datasets.light_microscopy.yeaz
The YeaZ dataset contains annotations for yeast cells in brightfield (2d) and phase-contrast (2d+t) microscopy images.
NOTE: The data is located at:
- Phase-contrast: https://drive.google.com/file/d/14MUIN26ou0L12UC9UV_AC2S3isj1qBMY.
- Brightfield: https://drive.google.com/file/d/1Sot3bau0F0dsBjRxoQzdGOeUy_wMezal
The dataset is located at https://www.epfl.ch/labs/lpbs/data-and-software/. This dataset is from the publication https://doi.org/10.1038/s41467-020-19557-4. Please cite it if you use this dataset for your research.
1"""The YeaZ dataset contains annotations for yeast cells in brightfield (2d) 2and phase-contrast (2d+t) microscopy images. 3 4NOTE: The data is located at: 5- Phase-contrast: https://drive.google.com/file/d/14MUIN26ou0L12UC9UV_AC2S3isj1qBMY. 6- Brightfield: https://drive.google.com/file/d/1Sot3bau0F0dsBjRxoQzdGOeUy_wMezal 7 8The dataset is located at https://www.epfl.ch/labs/lpbs/data-and-software/. 9This dataset is from the publication https://doi.org/10.1038/s41467-020-19557-4. 10Please cite it if you use this dataset for your research. 11""" 12 13import os 14from glob import glob 15from natsort import natsorted 16from typing import Union, Tuple, Literal, List 17 18import json 19from sklearn.model_selection import train_test_split 20 21from torch.utils.data import Dataset, DataLoader 22 23import torch_em 24 25from .. import util 26 27 28URL = { 29 "phc": "https://drive.google.com/file/d/14MUIN26ou0L12UC9UV_AC2S3isj1qBMY", 30 "bf": "https://drive.google.com/file/d/1Sot3bau0F0dsBjRxoQzdGOeUy_wMezal" 31} 32 33 34def get_yeaz_data(path: Union[os.PathLike, str], choice: Literal['bf, phc'], download: bool = False) -> str: 35 """Obtain the YeaZ dataset. 36 37 NOTE: Please download the dataset manually. 38 39 Args: 40 path: Filepath to a folder where the data is expected to be downloaded for further processing. 41 download: Whether to download the data if it is not present. Not implemented for this data. 42 43 Returns: 44 Filepath where the data is expected to be downloaded. 45 """ 46 if choice not in ['bf', 'phc']: 47 raise ValueError(f"'{choice}' is not a valid choice of dataset.") 48 49 data_dir = os.path.join(path, "gold-standard-PhC-plus-2" if choice == "phc" else "gold-standard-BF-V-1") 50 if os.path.exists(data_dir): 51 return data_dir 52 53 os.makedirs(path, exist_ok=True) 54 55 tar_path = os.path.join( 56 path, "gold-standard-PhC-plus-2.tar.gz" if choice == "phc" else "gold-standard-BF-V-1.tar.gz" 57 ) 58 59 if not os.path.exists(tar_path) or download: 60 raise NotImplementedError( 61 f"Automatic download is not supported. Please download the data manually from '{URL[choice]}'." 62 ) 63 64 util.unzip_tarfile(tar_path=tar_path, dst=path, remove=False) 65 66 return data_dir 67 68 69def _create_data_splits(path, data_dir, choice, split, raw_paths): 70 json_file = os.path.join(path, f"yeaz_{choice}_splits.json") 71 if os.path.exists(json_file): 72 with open(json_file, "r") as f: 73 data = json.load(f) 74 else: 75 # Get the filenames 76 names = [os.path.basename(p) for p in raw_paths] 77 78 # Create train / val / test splits 79 train_split, test_split = train_test_split(names, test_size=0.2) 80 train_split, val_split = train_test_split(train_split, test_size=0.15) 81 data = {"train": train_split, "val": val_split, "test": test_split} 82 83 # Write the filenames with splits to a json file. 84 with open(json_file, "w") as f: 85 json.dump(data, f, indent=4) 86 87 _raw_paths = [os.path.join(data_dir, name) for name in data[split]] 88 _label_paths = [p.replace("_im.tif", "_mask.tif") for p in _raw_paths] 89 90 return _raw_paths, _label_paths 91 92 93def get_yeaz_paths( 94 path: Union[os.PathLike, str], 95 choice: Literal['bf, phc'], 96 split: Literal['train', 'val', 'test'], 97 download: bool = False 98) -> Tuple[List[str], List[str]]: 99 """Get the YeaZ data. 100 101 Args: 102 path: Filepath to a folder where the data is expected to be downloaded for further processing. 103 choice: The choice of modality for dataset. 104 split: The choice of data split. 105 download: Whether to download the data if it is not present. Not implemented for this data. 106 107 Returns: 108 List of filepaths for the image data. 109 List of filepaths for the label data. 110 """ 111 data_dir = get_yeaz_data(path, choice, download) 112 113 raw_paths = natsorted(glob(os.path.join(data_dir, "*_im.tif"))) 114 115 # Get the raw and label paths. 116 raw_paths, label_paths = _create_data_splits(path, data_dir, choice, split, raw_paths) 117 118 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 119 120 return raw_paths, label_paths 121 122 123def get_yeaz_dataset( 124 path: Union[os.PathLike, str], 125 patch_shape: Tuple[int, int], 126 choice: Literal['bf, phc'], 127 split: Literal['train', 'val', 'test'], 128 download: bool = False, 129 **kwargs 130) -> Dataset: 131 """Get the YeaZ dataset for yeast cell segmentation. 132 133 Args: 134 path: Filepath to a folder where the data is expected to be downloaded for further processing. 135 patch_shape: The patch shape to use for training. 136 choice: The choice of modality for dataset. 137 split: The choice of data split. 138 download: Whether to download the data if it is not present. Not implemented for this data. 139 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 140 141 Returns: 142 The segmentation dataset. 143 """ 144 raw_paths, label_paths = get_yeaz_paths(path, choice, split, download) 145 146 return torch_em.default_segmentation_dataset( 147 raw_paths=raw_paths, 148 raw_key=None, 149 label_paths=label_paths, 150 label_key=None, 151 patch_shape=patch_shape, 152 **kwargs 153 ) 154 155 156def get_yeaz_loader( 157 path: Union[os.PathLike, str], 158 batch_size: int, 159 patch_shape: Tuple[int, int], 160 choice: Literal['bf, phc'], 161 split: Literal['train', 'val', 'test'], 162 download: bool = False, 163 **kwargs 164) -> DataLoader: 165 """Get the YeaZ dataloader for yeast cell segmentation. 166 167 Args: 168 path: Filepath to a folder where the data is expected to be downloaded for further processing. 169 batch_size: The batch size for training. 170 patch_shape: The patch shape to use for training. 171 choice: The choice of modality for dataset. 172 split: The choice of data split. 173 download: Whether to download the data if it is not present. Not implemented for this data. 174 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 175 176 Returns: 177 The DataLoader. 178 """ 179 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 180 dataset = get_yeaz_dataset(path, patch_shape, choice, split, download, **ds_kwargs) 181 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
{'phc': 'https://drive.google.com/file/d/14MUIN26ou0L12UC9UV_AC2S3isj1qBMY', 'bf': 'https://drive.google.com/file/d/1Sot3bau0F0dsBjRxoQzdGOeUy_wMezal'}
def
get_yeaz_data( path: Union[os.PathLike, str], choice: Literal['bf, phc'], download: bool = False) -> str:
35def get_yeaz_data(path: Union[os.PathLike, str], choice: Literal['bf, phc'], download: bool = False) -> str: 36 """Obtain the YeaZ dataset. 37 38 NOTE: Please download the dataset manually. 39 40 Args: 41 path: Filepath to a folder where the data is expected to be downloaded for further processing. 42 download: Whether to download the data if it is not present. Not implemented for this data. 43 44 Returns: 45 Filepath where the data is expected to be downloaded. 46 """ 47 if choice not in ['bf', 'phc']: 48 raise ValueError(f"'{choice}' is not a valid choice of dataset.") 49 50 data_dir = os.path.join(path, "gold-standard-PhC-plus-2" if choice == "phc" else "gold-standard-BF-V-1") 51 if os.path.exists(data_dir): 52 return data_dir 53 54 os.makedirs(path, exist_ok=True) 55 56 tar_path = os.path.join( 57 path, "gold-standard-PhC-plus-2.tar.gz" if choice == "phc" else "gold-standard-BF-V-1.tar.gz" 58 ) 59 60 if not os.path.exists(tar_path) or download: 61 raise NotImplementedError( 62 f"Automatic download is not supported. Please download the data manually from '{URL[choice]}'." 63 ) 64 65 util.unzip_tarfile(tar_path=tar_path, dst=path, remove=False) 66 67 return data_dir
Obtain the YeaZ dataset.
NOTE: Please download the dataset manually.
Arguments:
- path: Filepath to a folder where the data is expected to be downloaded for further processing.
- download: Whether to download the data if it is not present. Not implemented for this data.
Returns:
Filepath where the data is expected to be downloaded.
def
get_yeaz_paths( path: Union[os.PathLike, str], choice: Literal['bf, phc'], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
94def get_yeaz_paths( 95 path: Union[os.PathLike, str], 96 choice: Literal['bf, phc'], 97 split: Literal['train', 'val', 'test'], 98 download: bool = False 99) -> Tuple[List[str], List[str]]: 100 """Get the YeaZ data. 101 102 Args: 103 path: Filepath to a folder where the data is expected to be downloaded for further processing. 104 choice: The choice of modality for dataset. 105 split: The choice of data split. 106 download: Whether to download the data if it is not present. Not implemented for this data. 107 108 Returns: 109 List of filepaths for the image data. 110 List of filepaths for the label data. 111 """ 112 data_dir = get_yeaz_data(path, choice, download) 113 114 raw_paths = natsorted(glob(os.path.join(data_dir, "*_im.tif"))) 115 116 # Get the raw and label paths. 117 raw_paths, label_paths = _create_data_splits(path, data_dir, choice, split, raw_paths) 118 119 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 120 121 return raw_paths, label_paths
Get the YeaZ data.
Arguments:
- path: Filepath to a folder where the data is expected to be downloaded for further processing.
- choice: The choice of modality for dataset.
- split: The choice of data split.
- download: Whether to download the data if it is not present. Not implemented for this data.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_yeaz_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], choice: Literal['bf, phc'], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
124def get_yeaz_dataset( 125 path: Union[os.PathLike, str], 126 patch_shape: Tuple[int, int], 127 choice: Literal['bf, phc'], 128 split: Literal['train', 'val', 'test'], 129 download: bool = False, 130 **kwargs 131) -> Dataset: 132 """Get the YeaZ dataset for yeast cell segmentation. 133 134 Args: 135 path: Filepath to a folder where the data is expected to be downloaded for further processing. 136 patch_shape: The patch shape to use for training. 137 choice: The choice of modality for dataset. 138 split: The choice of data split. 139 download: Whether to download the data if it is not present. Not implemented for this data. 140 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 141 142 Returns: 143 The segmentation dataset. 144 """ 145 raw_paths, label_paths = get_yeaz_paths(path, choice, split, download) 146 147 return torch_em.default_segmentation_dataset( 148 raw_paths=raw_paths, 149 raw_key=None, 150 label_paths=label_paths, 151 label_key=None, 152 patch_shape=patch_shape, 153 **kwargs 154 )
Get the YeaZ dataset for yeast cell segmentation.
Arguments:
- path: Filepath to a folder where the data is expected to be downloaded for further processing.
- patch_shape: The patch shape to use for training.
- choice: The choice of modality for dataset.
- split: The choice of data split.
- download: Whether to download the data if it is not present. Not implemented for this data.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_yeaz_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], choice: Literal['bf, phc'], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
157def get_yeaz_loader( 158 path: Union[os.PathLike, str], 159 batch_size: int, 160 patch_shape: Tuple[int, int], 161 choice: Literal['bf, phc'], 162 split: Literal['train', 'val', 'test'], 163 download: bool = False, 164 **kwargs 165) -> DataLoader: 166 """Get the YeaZ dataloader for yeast cell segmentation. 167 168 Args: 169 path: Filepath to a folder where the data is expected to be downloaded for further processing. 170 batch_size: The batch size for training. 171 patch_shape: The patch shape to use for training. 172 choice: The choice of modality for dataset. 173 split: The choice of data split. 174 download: Whether to download the data if it is not present. Not implemented for this data. 175 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 176 177 Returns: 178 The DataLoader. 179 """ 180 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 181 dataset = get_yeaz_dataset(path, patch_shape, choice, split, download, **ds_kwargs) 182 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the YeaZ dataloader for yeast cell segmentation.
Arguments:
- path: Filepath to a folder where the data is expected to be downloaded for further processing.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- choice: The choice of modality for dataset.
- split: The choice of data split.
- download: Whether to download the data if it is not present. Not implemented for this data.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.