torch_em.data.datasets.light_microscopy.omnipose

The OmniPose dataset contains phase-contrast and fluorescence microscopy images and annotations for bacteria segmentation and brightfield microscopy images and annotations for worm segmentation.

This dataset is described in the publication https://doi.org/10.1038/s41592-022-01639-4. Please cite it if you use this dataset in your research.

  1"""The OmniPose dataset contains phase-contrast and fluorescence microscopy images
  2and annotations for bacteria segmentation and brightfield microscopy images and
  3annotations for worm segmentation.
  4
  5This dataset is described in the publication https://doi.org/10.1038/s41592-022-01639-4.
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9
 10import os
 11from glob import glob
 12from natsort import natsorted
 13from typing import Union, Tuple, Literal, Optional, List
 14
 15from torch.utils.data import Dataset, DataLoader
 16
 17import torch_em
 18
 19from .. import util
 20
 21
 22URL = "https://files.osf.io/v1/resources/xmury/providers/osfstorage/62f56c035775130690f25481/?zip="
 23
 24# NOTE: the checksums are not reliable from the osf project downloads.
 25# CHECKSUM = "7ae943ff5003b085a4cde7337bd9c69988b034cfe1a6d3f252b5268f1f4c0af7"
 26CHECKSUM = None
 27
 28DATA_CHOICES = ["bact_fluor", "bact_phase", "worm", "worm_high_res"]
 29
 30
 31def get_omnipose_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 32    """Download the OmniPose dataset.
 33
 34    Args:
 35        path: Filepath to the folder where the downloaded data will be saved.
 36        download: Whether to download the data if it is not present.
 37
 38    Return:
 39        The filepath where the data is downloaded.
 40    """
 41    os.makedirs(path, exist_ok=True)
 42
 43    data_dir = os.path.join(path, "data")
 44    if os.path.exists(data_dir):
 45        return data_dir
 46
 47    zip_path = os.path.join(path, "datasets.zip")
 48    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 49    util.unzip(zip_path=zip_path, dst=data_dir)
 50
 51    return data_dir
 52
 53
 54def get_omnipose_paths(
 55    path: Union[os.PathLike, str],
 56    split: Literal["train", "test"],
 57    data_choice: Optional[Union[str, List[str]]] = None,
 58    download: bool = False
 59) -> Tuple[List[str], List[str]]:
 60    """Get paths to the OmniPose data.
 61
 62    Args:
 63        path: Filepath to a folder where the downloaded data will be saved.
 64        split: The data split to use. Either 'train' or 'test'.
 65        data_choice: The choice of specific data.
 66            Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
 67        download: Whether to download the data if it is not present.
 68
 69    Returns:
 70        List of filepaths for the image data.
 71        List of filepaths for the label data.
 72    """
 73    data_dir = get_omnipose_data(path, download)
 74
 75    if split not in ["train", "test"]:
 76        raise ValueError(f"'{split}' is not a valid split.")
 77
 78    if data_choice is None:
 79        data_choice = DATA_CHOICES
 80    else:
 81        if not isinstance(data_choice, list):
 82            data_choice = [data_choice]
 83
 84    all_image_paths, all_gt_paths = [], []
 85    for _chosen_data in data_choice:
 86        if _chosen_data not in DATA_CHOICES:
 87            raise ValueError(f"'{_chosen_data}' is not a valid choice of data.")
 88
 89        if _chosen_data.startswith("bact"):
 90            base_dir = os.path.join(data_dir, _chosen_data, f"{split}_sorted", "*")
 91            gt_paths = glob(os.path.join(base_dir, "*_masks.tif"))
 92            image_paths = glob(os.path.join(base_dir, "*.tif"))
 93
 94        else:
 95            base_dir = os.path.join(data_dir, _chosen_data, split)
 96            gt_paths = glob(os.path.join(base_dir, "*_masks.*"))
 97            image_paths = glob(os.path.join(base_dir, "*"))
 98
 99        for _path in image_paths.copy():
100            # NOTE: Removing the masks and flows from the image paths.
101            if _path.endswith("_masks.tif") or _path.endswith("_masks.png") or _path.endswith("_flows.tif"):
102                image_paths.remove(_path)
103
104        all_image_paths.extend(natsorted(image_paths))
105        all_gt_paths.extend(natsorted(gt_paths))
106
107    return all_image_paths, all_gt_paths
108
109
110def get_omnipose_dataset(
111    path: Union[os.PathLike, str],
112    patch_shape: Tuple[int, int],
113    split: Literal["train", "test"],
114    data_choice: Optional[Union[str, List[str]]] = None,
115    download: bool = False,
116    **kwargs
117) -> Dataset:
118    """Get the OmniPose dataset for segmenting bacteria and worms in microscopy images.
119
120    Args:
121        path: Filepath to a folder where the downloaded data will be saved.
122        patch_shape: The patch shape to use for training.
123        split: The data split to use. Either 'train' or 'test'.
124        data_choice: The choice of specific data.
125            Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
126        download: Whether to download the data if it is not present.
127        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
128
129    Returns:
130        The segmentation dataset.
131    """
132    image_paths, gt_paths = get_omnipose_paths(path, split, data_choice, download)
133
134    return torch_em.default_segmentation_dataset(
135        raw_paths=image_paths,
136        raw_key=None,
137        label_paths=gt_paths,
138        label_key=None,
139        is_seg_dataset=False,
140        patch_shape=patch_shape,
141        **kwargs
142    )
143
144
145def get_omnipose_loader(
146    path: Union[os.PathLike, str],
147    patch_shape: Tuple[int, int],
148    batch_size: int,
149    split: Literal["train", "test"],
150    data_choice: Optional[Union[str, List[str]]] = None,
151    download: bool = False,
152    **kwargs
153) -> DataLoader:
154    """Get the OmniPose dataloader for segmenting bacteria and worms in microscopy images.
155
156    Args:
157        path: Filepath to a folder where the downloaded data will be saved.
158        patch_shape: The patch shape to use for training.
159        batch_size: The batch size for training.
160        split: The data split to use. Either 'train' or 'test'.
161        data_choice: The choice of specific data.
162            Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
163        download: Whether to download the data if it is not present.
164        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
165
166    Returns:
167        The DataLoader.
168    """
169    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
170    dataset = get_omnipose_dataset(path, patch_shape, split, data_choice, download, **ds_kwargs)
171    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://files.osf.io/v1/resources/xmury/providers/osfstorage/62f56c035775130690f25481/?zip='
CHECKSUM = None
DATA_CHOICES = ['bact_fluor', 'bact_phase', 'worm', 'worm_high_res']
def get_omnipose_data(path: Union[os.PathLike, str], download: bool = False) -> str:
32def get_omnipose_data(path: Union[os.PathLike, str], download: bool = False) -> str:
33    """Download the OmniPose dataset.
34
35    Args:
36        path: Filepath to the folder where the downloaded data will be saved.
37        download: Whether to download the data if it is not present.
38
39    Return:
40        The filepath where the data is downloaded.
41    """
42    os.makedirs(path, exist_ok=True)
43
44    data_dir = os.path.join(path, "data")
45    if os.path.exists(data_dir):
46        return data_dir
47
48    zip_path = os.path.join(path, "datasets.zip")
49    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
50    util.unzip(zip_path=zip_path, dst=data_dir)
51
52    return data_dir

Download the OmniPose dataset.

Arguments:
  • path: Filepath to the folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Return:

The filepath where the data is downloaded.

def get_omnipose_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], data_choice: Union[List[str], str, NoneType] = None, download: bool = False) -> Tuple[List[str], List[str]]:
 55def get_omnipose_paths(
 56    path: Union[os.PathLike, str],
 57    split: Literal["train", "test"],
 58    data_choice: Optional[Union[str, List[str]]] = None,
 59    download: bool = False
 60) -> Tuple[List[str], List[str]]:
 61    """Get paths to the OmniPose data.
 62
 63    Args:
 64        path: Filepath to a folder where the downloaded data will be saved.
 65        split: The data split to use. Either 'train' or 'test'.
 66        data_choice: The choice of specific data.
 67            Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
 68        download: Whether to download the data if it is not present.
 69
 70    Returns:
 71        List of filepaths for the image data.
 72        List of filepaths for the label data.
 73    """
 74    data_dir = get_omnipose_data(path, download)
 75
 76    if split not in ["train", "test"]:
 77        raise ValueError(f"'{split}' is not a valid split.")
 78
 79    if data_choice is None:
 80        data_choice = DATA_CHOICES
 81    else:
 82        if not isinstance(data_choice, list):
 83            data_choice = [data_choice]
 84
 85    all_image_paths, all_gt_paths = [], []
 86    for _chosen_data in data_choice:
 87        if _chosen_data not in DATA_CHOICES:
 88            raise ValueError(f"'{_chosen_data}' is not a valid choice of data.")
 89
 90        if _chosen_data.startswith("bact"):
 91            base_dir = os.path.join(data_dir, _chosen_data, f"{split}_sorted", "*")
 92            gt_paths = glob(os.path.join(base_dir, "*_masks.tif"))
 93            image_paths = glob(os.path.join(base_dir, "*.tif"))
 94
 95        else:
 96            base_dir = os.path.join(data_dir, _chosen_data, split)
 97            gt_paths = glob(os.path.join(base_dir, "*_masks.*"))
 98            image_paths = glob(os.path.join(base_dir, "*"))
 99
100        for _path in image_paths.copy():
101            # NOTE: Removing the masks and flows from the image paths.
102            if _path.endswith("_masks.tif") or _path.endswith("_masks.png") or _path.endswith("_flows.tif"):
103                image_paths.remove(_path)
104
105        all_image_paths.extend(natsorted(image_paths))
106        all_gt_paths.extend(natsorted(gt_paths))
107
108    return all_image_paths, all_gt_paths

Get paths to the OmniPose data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use. Either 'train' or 'test'.
  • data_choice: The choice of specific data. Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_omnipose_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], data_choice: Union[List[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
111def get_omnipose_dataset(
112    path: Union[os.PathLike, str],
113    patch_shape: Tuple[int, int],
114    split: Literal["train", "test"],
115    data_choice: Optional[Union[str, List[str]]] = None,
116    download: bool = False,
117    **kwargs
118) -> Dataset:
119    """Get the OmniPose dataset for segmenting bacteria and worms in microscopy images.
120
121    Args:
122        path: Filepath to a folder where the downloaded data will be saved.
123        patch_shape: The patch shape to use for training.
124        split: The data split to use. Either 'train' or 'test'.
125        data_choice: The choice of specific data.
126            Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
127        download: Whether to download the data if it is not present.
128        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
129
130    Returns:
131        The segmentation dataset.
132    """
133    image_paths, gt_paths = get_omnipose_paths(path, split, data_choice, download)
134
135    return torch_em.default_segmentation_dataset(
136        raw_paths=image_paths,
137        raw_key=None,
138        label_paths=gt_paths,
139        label_key=None,
140        is_seg_dataset=False,
141        patch_shape=patch_shape,
142        **kwargs
143    )

Get the OmniPose dataset for segmenting bacteria and worms in microscopy images.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train' or 'test'.
  • data_choice: The choice of specific data. Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_omnipose_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, split: Literal['train', 'test'], data_choice: Union[List[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
146def get_omnipose_loader(
147    path: Union[os.PathLike, str],
148    patch_shape: Tuple[int, int],
149    batch_size: int,
150    split: Literal["train", "test"],
151    data_choice: Optional[Union[str, List[str]]] = None,
152    download: bool = False,
153    **kwargs
154) -> DataLoader:
155    """Get the OmniPose dataloader for segmenting bacteria and worms in microscopy images.
156
157    Args:
158        path: Filepath to a folder where the downloaded data will be saved.
159        patch_shape: The patch shape to use for training.
160        batch_size: The batch size for training.
161        split: The data split to use. Either 'train' or 'test'.
162        data_choice: The choice of specific data.
163            Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
164        download: Whether to download the data if it is not present.
165        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
166
167    Returns:
168        The DataLoader.
169    """
170    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
171    dataset = get_omnipose_dataset(path, patch_shape, split, data_choice, download, **ds_kwargs)
172    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the OmniPose dataloader for segmenting bacteria and worms in microscopy images.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • split: The data split to use. Either 'train' or 'test'.
  • data_choice: The choice of specific data. Either 'bact_fluor', 'bact_phase', 'worm' or 'worm_high_res'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.