torch_em.data.datasets.light_microscopy.embedseg_data
This dataset contains annotation for 3d fluorescence microscopy segmentation that were introduced by the EmbedSeg publication.
This dataset is from the publication https://proceedings.mlr.press/v143/lalit21a.html. Please cite it if you use this dataset in your research.
1"""This dataset contains annotation for 3d fluorescence microscopy segmentation 2that were introduced by the EmbedSeg publication. 3 4This dataset is from the publication https://proceedings.mlr.press/v143/lalit21a.html. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from glob import glob 10from typing import Tuple, Union, List 11 12from torch.utils.data import Dataset, DataLoader 13 14import torch_em 15 16from .. import util 17 18 19URLS = { 20 "Mouse-Organoid-Cells-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Organoid-Cells-CBG.zip", # noqa 21 "Mouse-Skull-Nuclei-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Skull-Nuclei-CBG.zip", 22 "Platynereis-ISH-Nuclei-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-ISH-Nuclei-CBG.zip", # noqa 23 "Platynereis-Nuclei-CBG": "https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip", 24} 25CHECKSUMS = { 26 "Mouse-Organoid-Cells-CBG": "3695ac340473900ace8c37fd7f3ae0d37217de9f2b86c2341f36b1727825e48b", 27 "Mouse-Skull-Nuclei-CBG": "3600ec261a48bf953820e0536cacd0bb8a5141be6e7435a4cb0fffeb0caf594e", 28 "Platynereis-ISH-Nuclei-CBG": "bc9284df6f6d691a8e81b47310d95617252cc98ebf7daeab55801b330ba921e0", 29 "Platynereis-Nuclei-CBG": "448cb7b46f2fe7d472795e05c8d7dfb40f259d94595ad2cfd256bc2aa4ab3be7", 30} 31 32 33def get_embedseg_data(path: Union[os.PathLike, str], name: str, download: bool) -> str: 34 """Download the EmbedSeg training data. 35 36 Args: 37 path: Filepath to a folder where the downloaded data will be saved. 38 name: Name of the dataset to download. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 The filepath to the training data. 43 """ 44 if name not in URLS: 45 raise ValueError(f"The dataset name must be in {list(URLS.keys())}. You provided {name}.") 46 47 url = URLS[name] 48 checksum = CHECKSUMS[name] 49 50 data_path = os.path.join(path, name) 51 if os.path.exists(data_path): 52 return data_path 53 54 os.makedirs(path, exist_ok=True) 55 zip_path = os.path.join(path, f"{name}.zip") 56 util.download_source(zip_path, url, download, checksum) 57 util.unzip(zip_path, path, True) 58 59 return data_path 60 61 62def get_embedseg_paths( 63 path: Union[os.PathLike, str], name: str, split: str, download: bool = False 64) -> Tuple[List[str], List[str]]: 65 """Get paths to the EmbedSeg data. 66 67 Args: 68 path: Filepath to a folder where the downloaded data will be saved. 69 name: Name of the dataset to download. 70 split: The split to use for the dataset. 71 download: Whether to download the data if it is not present. 72 73 Returns: 74 List of filepaths for the mage data. 75 List of filepaths for the label data. 76 """ 77 data_root = get_embedseg_data(path, name, download) 78 79 raw_paths = sorted(glob(os.path.join(data_root, split, "images", "*.tif"))) 80 label_paths = sorted(glob(os.path.join(data_root, split, "masks", "*.tif"))) 81 assert len(raw_paths) > 0 82 assert len(raw_paths) == len(label_paths) 83 84 return raw_paths, label_paths 85 86 87def get_embedseg_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, int], 90 name: str, 91 split: str = "train", 92 download: bool = False, 93 **kwargs 94) -> Dataset: 95 """Get the EmbedSeg dataset for 3d fluorescence microscopy segmentation. 96 97 Args: 98 path: Filepath to a folder where the downloaded data will be saved. 99 patch_shape: The patch shape to use for training. 100 name: Name of the dataset to download. 101 split: The split to use for the dataset. 102 download: Whether to download the data if it is not present. 103 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 104 105 Returns: 106 The segmentation dataset. 107 """ 108 raw_paths, label_paths = get_embedseg_paths(path, name, split, download) 109 110 return torch_em.default_segmentation_dataset( 111 raw_paths=raw_paths, 112 raw_key=None, 113 label_paths=label_paths, 114 label_key=None, 115 patch_shape=patch_shape, 116 **kwargs 117 ) 118 119 120def get_embedseg_loader( 121 path: Union[os.PathLike, str], 122 patch_shape: Tuple[int, int], 123 batch_size: int, 124 name: str, 125 split: str = "train", 126 download: bool = False, 127 **kwargs 128) -> DataLoader: 129 """Get the EmbedSeg dataloader for 3d fluorescence microscopy segmentation. 130 131 Args: 132 path: Filepath to a folder where the downloaded data will be saved. 133 patch_shape: The patch shape to use for training. 134 batch_size: The batch size for training. 135 name: Name of the dataset to download. 136 split: The split to use for the dataset. 137 download: Whether to download the data if it is not present. 138 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 139 140 Returns: 141 The DataLoader. 142 """ 143 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 144 dataset = get_embedseg_dataset( 145 path, name=name, split=split, patch_shape=patch_shape, download=download, **ds_kwargs, 146 ) 147 return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
URLS =
{'Mouse-Organoid-Cells-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Organoid-Cells-CBG.zip', 'Mouse-Skull-Nuclei-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Mouse-Skull-Nuclei-CBG.zip', 'Platynereis-ISH-Nuclei-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-ISH-Nuclei-CBG.zip', 'Platynereis-Nuclei-CBG': 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip'}
CHECKSUMS =
{'Mouse-Organoid-Cells-CBG': '3695ac340473900ace8c37fd7f3ae0d37217de9f2b86c2341f36b1727825e48b', 'Mouse-Skull-Nuclei-CBG': '3600ec261a48bf953820e0536cacd0bb8a5141be6e7435a4cb0fffeb0caf594e', 'Platynereis-ISH-Nuclei-CBG': 'bc9284df6f6d691a8e81b47310d95617252cc98ebf7daeab55801b330ba921e0', 'Platynereis-Nuclei-CBG': '448cb7b46f2fe7d472795e05c8d7dfb40f259d94595ad2cfd256bc2aa4ab3be7'}
def
get_embedseg_data(path: Union[os.PathLike, str], name: str, download: bool) -> str:
34def get_embedseg_data(path: Union[os.PathLike, str], name: str, download: bool) -> str: 35 """Download the EmbedSeg training data. 36 37 Args: 38 path: Filepath to a folder where the downloaded data will be saved. 39 name: Name of the dataset to download. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 The filepath to the training data. 44 """ 45 if name not in URLS: 46 raise ValueError(f"The dataset name must be in {list(URLS.keys())}. You provided {name}.") 47 48 url = URLS[name] 49 checksum = CHECKSUMS[name] 50 51 data_path = os.path.join(path, name) 52 if os.path.exists(data_path): 53 return data_path 54 55 os.makedirs(path, exist_ok=True) 56 zip_path = os.path.join(path, f"{name}.zip") 57 util.download_source(zip_path, url, download, checksum) 58 util.unzip(zip_path, path, True) 59 60 return data_path
Download the EmbedSeg training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: Name of the dataset to download.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
def
get_embedseg_paths( path: Union[os.PathLike, str], name: str, split: str, download: bool = False) -> Tuple[List[str], List[str]]:
63def get_embedseg_paths( 64 path: Union[os.PathLike, str], name: str, split: str, download: bool = False 65) -> Tuple[List[str], List[str]]: 66 """Get paths to the EmbedSeg data. 67 68 Args: 69 path: Filepath to a folder where the downloaded data will be saved. 70 name: Name of the dataset to download. 71 split: The split to use for the dataset. 72 download: Whether to download the data if it is not present. 73 74 Returns: 75 List of filepaths for the mage data. 76 List of filepaths for the label data. 77 """ 78 data_root = get_embedseg_data(path, name, download) 79 80 raw_paths = sorted(glob(os.path.join(data_root, split, "images", "*.tif"))) 81 label_paths = sorted(glob(os.path.join(data_root, split, "masks", "*.tif"))) 82 assert len(raw_paths) > 0 83 assert len(raw_paths) == len(label_paths) 84 85 return raw_paths, label_paths
Get paths to the EmbedSeg data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: Name of the dataset to download.
- split: The split to use for the dataset.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the mage data. List of filepaths for the label data.
def
get_embedseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], name: str, split: str = 'train', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
88def get_embedseg_dataset( 89 path: Union[os.PathLike, str], 90 patch_shape: Tuple[int, int], 91 name: str, 92 split: str = "train", 93 download: bool = False, 94 **kwargs 95) -> Dataset: 96 """Get the EmbedSeg dataset for 3d fluorescence microscopy segmentation. 97 98 Args: 99 path: Filepath to a folder where the downloaded data will be saved. 100 patch_shape: The patch shape to use for training. 101 name: Name of the dataset to download. 102 split: The split to use for the dataset. 103 download: Whether to download the data if it is not present. 104 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 105 106 Returns: 107 The segmentation dataset. 108 """ 109 raw_paths, label_paths = get_embedseg_paths(path, name, split, download) 110 111 return torch_em.default_segmentation_dataset( 112 raw_paths=raw_paths, 113 raw_key=None, 114 label_paths=label_paths, 115 label_key=None, 116 patch_shape=patch_shape, 117 **kwargs 118 )
Get the EmbedSeg dataset for 3d fluorescence microscopy segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- name: Name of the dataset to download.
- split: The split to use for the dataset.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_embedseg_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, name: str, split: str = 'train', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
121def get_embedseg_loader( 122 path: Union[os.PathLike, str], 123 patch_shape: Tuple[int, int], 124 batch_size: int, 125 name: str, 126 split: str = "train", 127 download: bool = False, 128 **kwargs 129) -> DataLoader: 130 """Get the EmbedSeg dataloader for 3d fluorescence microscopy segmentation. 131 132 Args: 133 path: Filepath to a folder where the downloaded data will be saved. 134 patch_shape: The patch shape to use for training. 135 batch_size: The batch size for training. 136 name: Name of the dataset to download. 137 split: The split to use for the dataset. 138 download: Whether to download the data if it is not present. 139 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 140 141 Returns: 142 The DataLoader. 143 """ 144 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 145 dataset = get_embedseg_dataset( 146 path, name=name, split=split, patch_shape=patch_shape, download=download, **ds_kwargs, 147 ) 148 return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
Get the EmbedSeg dataloader for 3d fluorescence microscopy segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- name: Name of the dataset to download.
- split: The split to use for the dataset.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.