torch_em.data.datasets.light_microscopy.omnipose
The OmniPose dataset contains phase-contrast and fluorescence microscopy images and annotations for bacteria segmentation and brightfield microscopy images and annotations for worm segmentation.
This dataset is described in the publication https://doi.org/10.1038/s41592-022-01639-4. Please cite it if you use this dataset in your research.
1"""The OmniPose dataset contains phase-contrast and fluorescence microscopy images 2and annotations for bacteria segmentation and brightfield microscopy images and 3annotations for worm segmentation. 4 5This dataset is described in the publication https://doi.org/10.1038/s41592-022-01639-4. 6Please cite it if you use this dataset in your research. 7""" 8 9 10import os 11from glob import glob 12from natsort import natsorted 13from typing import Union, Tuple, Literal, Optional, List 14 15from torch.utils.data import Dataset, DataLoader 16 17import torch_em 18 19from .. import util 20 21 22URL = "https://files.osf.io/v1/resources/xmury/providers/osfstorage/62f56c035775130690f25481/?zip=" 23 24# NOTE: the checksums are not reliable from the osf project downloads. 25# CHECKSUM = "7ae943ff5003b085a4cde7337bd9c69988b034cfe1a6d3f252b5268f1f4c0af7" 26CHECKSUM = None 27 28DATA_CHOICES = ["bact_fluor", "bact_phase", "worm", "worm_high_res"] 29 30 31def get_omnipose_data(path: Union[os.PathLike, str], download: bool = False) -> str: 32 """Download the OmniPose dataset. 33 34 Args: 35 path: Filepath to the folder where the downloaded data will be saved. 36 download: Whether to download the data if it is not present. 37 38 Return: 39 The filepath where the data is downloaded. 40 """ 41 os.makedirs(path, exist_ok=True) 42 43 data_dir = os.path.join(path, "data") 44 if os.path.exists(data_dir): 45 return data_dir 46 47 zip_path = os.path.join(path, "datasets.zip") 48 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 49 util.unzip(zip_path=zip_path, dst=data_dir) 50 51 return data_dir 52 53 54def get_omnipose_paths( 55 path: Union[os.PathLike, str], 56 split: Literal["train", "test"], 57 data_choice: Optional[Union[str, List[str]]] = None, 58 download: bool = False 59) -> Tuple[List[str], List[str]]: 60 """Get paths to the OmniPose data. 61 62 Args: 63 path: Filepath to a folder where the downloaded data will be saved. 64 split: The data split to use. Either 'train' or 'test'. 65 data_choice: The choice of specific data. 66 Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'. 67 download: Whether to download the data if it is not present. 68 69 Returns: 70 List of filepaths for the image data. 71 List of filepaths for the label data. 72 """ 73 data_dir = get_omnipose_data(path, download) 74 75 if split not in ["train", "test"]: 76 raise ValueError(f"'{split}' is not a valid split.") 77 78 if data_choice is None: 79 data_choice = DATA_CHOICES 80 else: 81 if not isinstance(data_choice, list): 82 data_choice = [data_choice] 83 84 all_image_paths, all_gt_paths = [], [] 85 for _chosen_data in data_choice: 86 if _chosen_data not in DATA_CHOICES: 87 raise ValueError(f"'{_chosen_data}' is not a valid choice of data.") 88 89 if _chosen_data.startswith("bact"): 90 base_dir = os.path.join(data_dir, _chosen_data, f"{split}_sorted", "*") 91 gt_paths = glob(os.path.join(base_dir, "*_masks.tif")) 92 image_paths = glob(os.path.join(base_dir, "*.tif")) 93 94 else: 95 base_dir = os.path.join(data_dir, _chosen_data, split) 96 gt_paths = glob(os.path.join(base_dir, "*_masks.*")) 97 image_paths = glob(os.path.join(base_dir, "*")) 98 99 for _path in image_paths.copy(): 100 # NOTE: Removing the masks and flows from the image paths. 101 if _path.endswith("_masks.tif") or _path.endswith("_masks.png") or _path.endswith("_flows.tif"): 102 image_paths.remove(_path) 103 104 all_image_paths.extend(natsorted(image_paths)) 105 all_gt_paths.extend(natsorted(gt_paths)) 106 107 return all_image_paths, all_gt_paths 108 109 110def get_omnipose_dataset( 111 path: Union[os.PathLike, str], 112 patch_shape: Tuple[int, int], 113 split: Literal["train", "test"], 114 data_choice: Optional[Union[str, List[str]]] = None, 115 download: bool = False, 116 **kwargs 117) -> Dataset: 118 """Get the OmniPose dataset for segmenting bacteria and worms in microscopy images. 119 120 Args: 121 path: Filepath to a folder where the downloaded data will be saved. 122 patch_shape: The patch shape to use for training. 123 split: The data split to use. Either 'train' or 'test'. 124 data_choice: The choice of specific data. 125 Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'. 126 download: Whether to download the data if it is not present. 127 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 128 129 Returns: 130 The segmentation dataset. 131 """ 132 image_paths, gt_paths = get_omnipose_paths(path, split, data_choice, download) 133 134 return torch_em.default_segmentation_dataset( 135 raw_paths=image_paths, 136 raw_key=None, 137 label_paths=gt_paths, 138 label_key=None, 139 is_seg_dataset=False, 140 patch_shape=patch_shape, 141 **kwargs 142 ) 143 144 145def get_omnipose_loader( 146 path: Union[os.PathLike, str], 147 patch_shape: Tuple[int, int], 148 batch_size: int, 149 split: Literal["train", "test"], 150 data_choice: Optional[Union[str, List[str]]] = None, 151 download: bool = False, 152 **kwargs 153) -> DataLoader: 154 """Get the OmniPose dataloader for segmenting bacteria and worms in microscopy images. 155 156 Args: 157 path: Filepath to a folder where the downloaded data will be saved. 158 patch_shape: The patch shape to use for training. 159 batch_size: The batch size for training. 160 split: The data split to use. Either 'train' or 'test'. 161 data_choice: The choice of specific data. 162 Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'. 163 download: Whether to download the data if it is not present. 164 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 165 166 Returns: 167 The DataLoader. 168 """ 169 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 170 dataset = get_omnipose_dataset(path, patch_shape, split, data_choice, download, **ds_kwargs) 171 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://files.osf.io/v1/resources/xmury/providers/osfstorage/62f56c035775130690f25481/?zip='
CHECKSUM =
None
DATA_CHOICES =
['bact_fluor', 'bact_phase', 'worm', 'worm_high_res']
def
get_omnipose_data(path: Union[os.PathLike, str], download: bool = False) -> str:
32def get_omnipose_data(path: Union[os.PathLike, str], download: bool = False) -> str: 33 """Download the OmniPose dataset. 34 35 Args: 36 path: Filepath to the folder where the downloaded data will be saved. 37 download: Whether to download the data if it is not present. 38 39 Return: 40 The filepath where the data is downloaded. 41 """ 42 os.makedirs(path, exist_ok=True) 43 44 data_dir = os.path.join(path, "data") 45 if os.path.exists(data_dir): 46 return data_dir 47 48 zip_path = os.path.join(path, "datasets.zip") 49 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 50 util.unzip(zip_path=zip_path, dst=data_dir) 51 52 return data_dir
Download the OmniPose dataset.
Arguments:
- path: Filepath to the folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Return:
The filepath where the data is downloaded.
def
get_omnipose_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], data_choice: Union[List[str], str, NoneType] = None, download: bool = False) -> Tuple[List[str], List[str]]:
55def get_omnipose_paths( 56 path: Union[os.PathLike, str], 57 split: Literal["train", "test"], 58 data_choice: Optional[Union[str, List[str]]] = None, 59 download: bool = False 60) -> Tuple[List[str], List[str]]: 61 """Get paths to the OmniPose data. 62 63 Args: 64 path: Filepath to a folder where the downloaded data will be saved. 65 split: The data split to use. Either 'train' or 'test'. 66 data_choice: The choice of specific data. 67 Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'. 68 download: Whether to download the data if it is not present. 69 70 Returns: 71 List of filepaths for the image data. 72 List of filepaths for the label data. 73 """ 74 data_dir = get_omnipose_data(path, download) 75 76 if split not in ["train", "test"]: 77 raise ValueError(f"'{split}' is not a valid split.") 78 79 if data_choice is None: 80 data_choice = DATA_CHOICES 81 else: 82 if not isinstance(data_choice, list): 83 data_choice = [data_choice] 84 85 all_image_paths, all_gt_paths = [], [] 86 for _chosen_data in data_choice: 87 if _chosen_data not in DATA_CHOICES: 88 raise ValueError(f"'{_chosen_data}' is not a valid choice of data.") 89 90 if _chosen_data.startswith("bact"): 91 base_dir = os.path.join(data_dir, _chosen_data, f"{split}_sorted", "*") 92 gt_paths = glob(os.path.join(base_dir, "*_masks.tif")) 93 image_paths = glob(os.path.join(base_dir, "*.tif")) 94 95 else: 96 base_dir = os.path.join(data_dir, _chosen_data, split) 97 gt_paths = glob(os.path.join(base_dir, "*_masks.*")) 98 image_paths = glob(os.path.join(base_dir, "*")) 99 100 for _path in image_paths.copy(): 101 # NOTE: Removing the masks and flows from the image paths. 102 if _path.endswith("_masks.tif") or _path.endswith("_masks.png") or _path.endswith("_flows.tif"): 103 image_paths.remove(_path) 104 105 all_image_paths.extend(natsorted(image_paths)) 106 all_gt_paths.extend(natsorted(gt_paths)) 107 108 return all_image_paths, all_gt_paths
Get paths to the OmniPose data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train' or 'test'.
- data_choice: The choice of specific data. Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_omnipose_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], data_choice: Union[List[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
111def get_omnipose_dataset( 112 path: Union[os.PathLike, str], 113 patch_shape: Tuple[int, int], 114 split: Literal["train", "test"], 115 data_choice: Optional[Union[str, List[str]]] = None, 116 download: bool = False, 117 **kwargs 118) -> Dataset: 119 """Get the OmniPose dataset for segmenting bacteria and worms in microscopy images. 120 121 Args: 122 path: Filepath to a folder where the downloaded data will be saved. 123 patch_shape: The patch shape to use for training. 124 split: The data split to use. Either 'train' or 'test'. 125 data_choice: The choice of specific data. 126 Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'. 127 download: Whether to download the data if it is not present. 128 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 129 130 Returns: 131 The segmentation dataset. 132 """ 133 image_paths, gt_paths = get_omnipose_paths(path, split, data_choice, download) 134 135 return torch_em.default_segmentation_dataset( 136 raw_paths=image_paths, 137 raw_key=None, 138 label_paths=gt_paths, 139 label_key=None, 140 is_seg_dataset=False, 141 patch_shape=patch_shape, 142 **kwargs 143 )
Get the OmniPose dataset for segmenting bacteria and worms in microscopy images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train' or 'test'.
- data_choice: The choice of specific data. Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_omnipose_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, split: Literal['train', 'test'], data_choice: Union[List[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
146def get_omnipose_loader( 147 path: Union[os.PathLike, str], 148 patch_shape: Tuple[int, int], 149 batch_size: int, 150 split: Literal["train", "test"], 151 data_choice: Optional[Union[str, List[str]]] = None, 152 download: bool = False, 153 **kwargs 154) -> DataLoader: 155 """Get the OmniPose dataloader for segmenting bacteria and worms in microscopy images. 156 157 Args: 158 path: Filepath to a folder where the downloaded data will be saved. 159 patch_shape: The patch shape to use for training. 160 batch_size: The batch size for training. 161 split: The data split to use. Either 'train' or 'test'. 162 data_choice: The choice of specific data. 163 Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'. 164 download: Whether to download the data if it is not present. 165 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 166 167 Returns: 168 The DataLoader. 169 """ 170 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 171 dataset = get_omnipose_dataset(path, patch_shape, split, data_choice, download, **ds_kwargs) 172 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the OmniPose dataloader for segmenting bacteria and worms in microscopy images.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- split: The data split to use. Either 'train' or 'test'.
- data_choice: The choice of specific data. Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.