torch_em.data.datasets.light_microscopy.microbeseg
The microbeSEG dataset contains annotations for bacterial cell instance segmentation in phase-contrast microscopy images of B. subtilis and E. coli.
The dataset is located at https://zenodo.org/records/6497715. This dataset is from the publication https://doi.org/10.1371/journal.pone.0277601. Please cite it if you use this dataset in your research.
1"""The microbeSEG dataset contains annotations for bacterial cell instance segmentation 2in phase-contrast microscopy images of B. subtilis and E. coli. 3 4The dataset is located at https://zenodo.org/records/6497715. 5This dataset is from the publication https://doi.org/10.1371/journal.pone.0277601. 6Please cite it if you use this dataset in your research. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Literal, Tuple, Optional, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://zenodo.org/records/6497715/files/microbeSEG_dataset.zip" 22CHECKSUM = None 23 24ANNOTATION_TYPES = ["30min-man", "30min-man_15min-pre"] 25SPLITS = ["train", "val", "test", "complete"] 26 27 28def get_microbeseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 29 """Download the microbeSEG dataset. 30 31 Args: 32 path: Filepath to a folder where the downloaded data will be saved. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The filepath to the extracted data directory. 37 """ 38 data_dir = os.path.join(path, "microbeSEG_dataset") 39 if os.path.exists(data_dir): 40 return data_dir 41 42 os.makedirs(path, exist_ok=True) 43 zip_path = os.path.join(path, "microbeSEG_dataset.zip") 44 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 45 util.unzip(zip_path=zip_path, dst=path) 46 47 return data_dir 48 49 50def get_microbeseg_paths( 51 path: Union[os.PathLike, str], 52 split: Literal["train", "val", "test", "complete"] = "train", 53 annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre", 54 download: bool = False, 55) -> Tuple[List[str], List[str]]: 56 """Get paths to the microbeSEG data. 57 58 Args: 59 path: Filepath to a folder where the downloaded data will be saved. 60 split: The data split to use. One of 'train', 'val', 'test' or 'complete'. 61 annotation_type: The annotation type. Either '30min-man' (manual only) 62 or '30min-man_15min-pre' (manual + pre-labeling correction, more data). 63 download: Whether to download the data if it is not present. 64 65 Returns: 66 List of filepaths for the image data. 67 List of filepaths for the label data. 68 """ 69 assert split in SPLITS, f"'{split}' is not a valid split. Choose from {SPLITS}." 70 assert annotation_type in ANNOTATION_TYPES, \ 71 f"'{annotation_type}' is not a valid annotation type. Choose from {ANNOTATION_TYPES}." 72 73 data_dir = get_microbeseg_data(path, download) 74 75 split_dir = os.path.join(data_dir, annotation_type, split) 76 assert os.path.exists(split_dir), f"Split directory not found: {split_dir}" 77 78 image_paths = natsorted(glob(os.path.join(split_dir, "img_*.tif"))) 79 seg_paths = natsorted(glob(os.path.join(split_dir, "mask_*.tif"))) 80 assert len(image_paths) == len(seg_paths) and len(image_paths) > 0 81 82 return image_paths, seg_paths 83 84 85def get_microbeseg_dataset( 86 path: Union[os.PathLike, str], 87 patch_shape: Tuple[int, int], 88 split: Literal["train", "val", "test", "complete"] = "train", 89 annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre", 90 offsets: Optional[List[List[int]]] = None, 91 boundaries: bool = False, 92 binary: bool = False, 93 download: bool = False, 94 **kwargs 95) -> Dataset: 96 """Get the microbeSEG dataset for bacterial cell segmentation. 97 98 Args: 99 path: Filepath to a folder where the downloaded data will be saved. 100 patch_shape: The patch shape to use for training. 101 split: The data split to use. One of 'train', 'val', 'test' or 'complete'. 102 annotation_type: The annotation type. Either '30min-man' (manual only) 103 or '30min-man_15min-pre' (manual + pre-labeling correction, more data). 104 offsets: Offset values for affinity computation used as target. 105 boundaries: Whether to compute boundaries as the target. 106 binary: Whether to use a binary segmentation target. 107 download: Whether to download the data if it is not present. 108 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 109 110 Returns: 111 The segmentation dataset. 112 """ 113 image_paths, seg_paths = get_microbeseg_paths(path, split, annotation_type, download) 114 115 kwargs = util.ensure_transforms(ndim=2, **kwargs) 116 kwargs, _ = util.add_instance_label_transform( 117 kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary 118 ) 119 120 return torch_em.default_segmentation_dataset( 121 raw_paths=image_paths, 122 raw_key=None, 123 label_paths=seg_paths, 124 label_key=None, 125 patch_shape=patch_shape, 126 is_seg_dataset=False, 127 ndim=2, 128 **kwargs 129 ) 130 131 132def get_microbeseg_loader( 133 path: Union[os.PathLike, str], 134 batch_size: int, 135 patch_shape: Tuple[int, int], 136 split: Literal["train", "val", "test", "complete"] = "train", 137 annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre", 138 offsets: Optional[List[List[int]]] = None, 139 boundaries: bool = False, 140 binary: bool = False, 141 download: bool = False, 142 **kwargs 143) -> DataLoader: 144 """Get the microbeSEG dataloader for bacterial cell segmentation. 145 146 Args: 147 path: Filepath to a folder where the downloaded data will be saved. 148 batch_size: The batch size for training. 149 patch_shape: The patch shape to use for training. 150 split: The data split to use. One of 'train', 'val', 'test' or 'complete'. 151 annotation_type: The annotation type. Either '30min-man' (manual only) 152 or '30min-man_15min-pre' (manual + pre-labeling correction, more data). 153 offsets: Offset values for affinity computation used as target. 154 boundaries: Whether to compute boundaries as the target. 155 binary: Whether to use a binary segmentation target. 156 download: Whether to download the data if it is not present. 157 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 158 159 Returns: 160 The DataLoader. 161 """ 162 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 163 dataset = get_microbeseg_dataset( 164 path=path, 165 patch_shape=patch_shape, 166 split=split, 167 annotation_type=annotation_type, 168 offsets=offsets, 169 boundaries=boundaries, 170 binary=binary, 171 download=download, 172 **ds_kwargs, 173 ) 174 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/6497715/files/microbeSEG_dataset.zip'
CHECKSUM =
None
ANNOTATION_TYPES =
['30min-man', '30min-man_15min-pre']
SPLITS =
['train', 'val', 'test', 'complete']
def
get_microbeseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_microbeseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the microbeSEG dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The filepath to the extracted data directory. 38 """ 39 data_dir = os.path.join(path, "microbeSEG_dataset") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 zip_path = os.path.join(path, "microbeSEG_dataset.zip") 45 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 46 util.unzip(zip_path=zip_path, dst=path) 47 48 return data_dir
Download the microbeSEG dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the extracted data directory.
def
get_microbeseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test', 'complete'] = 'train', annotation_type: Literal['30min-man', '30min-man_15min-pre'] = '30min-man_15min-pre', download: bool = False) -> Tuple[List[str], List[str]]:
51def get_microbeseg_paths( 52 path: Union[os.PathLike, str], 53 split: Literal["train", "val", "test", "complete"] = "train", 54 annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre", 55 download: bool = False, 56) -> Tuple[List[str], List[str]]: 57 """Get paths to the microbeSEG data. 58 59 Args: 60 path: Filepath to a folder where the downloaded data will be saved. 61 split: The data split to use. One of 'train', 'val', 'test' or 'complete'. 62 annotation_type: The annotation type. Either '30min-man' (manual only) 63 or '30min-man_15min-pre' (manual + pre-labeling correction, more data). 64 download: Whether to download the data if it is not present. 65 66 Returns: 67 List of filepaths for the image data. 68 List of filepaths for the label data. 69 """ 70 assert split in SPLITS, f"'{split}' is not a valid split. Choose from {SPLITS}." 71 assert annotation_type in ANNOTATION_TYPES, \ 72 f"'{annotation_type}' is not a valid annotation type. Choose from {ANNOTATION_TYPES}." 73 74 data_dir = get_microbeseg_data(path, download) 75 76 split_dir = os.path.join(data_dir, annotation_type, split) 77 assert os.path.exists(split_dir), f"Split directory not found: {split_dir}" 78 79 image_paths = natsorted(glob(os.path.join(split_dir, "img_*.tif"))) 80 seg_paths = natsorted(glob(os.path.join(split_dir, "mask_*.tif"))) 81 assert len(image_paths) == len(seg_paths) and len(image_paths) > 0 82 83 return image_paths, seg_paths
Get paths to the microbeSEG data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
- annotation_type: The annotation type. Either '30min-man' (manual only) or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_microbeseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test', 'complete'] = 'train', annotation_type: Literal['30min-man', '30min-man_15min-pre'] = '30min-man_15min-pre', offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
86def get_microbeseg_dataset( 87 path: Union[os.PathLike, str], 88 patch_shape: Tuple[int, int], 89 split: Literal["train", "val", "test", "complete"] = "train", 90 annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre", 91 offsets: Optional[List[List[int]]] = None, 92 boundaries: bool = False, 93 binary: bool = False, 94 download: bool = False, 95 **kwargs 96) -> Dataset: 97 """Get the microbeSEG dataset for bacterial cell segmentation. 98 99 Args: 100 path: Filepath to a folder where the downloaded data will be saved. 101 patch_shape: The patch shape to use for training. 102 split: The data split to use. One of 'train', 'val', 'test' or 'complete'. 103 annotation_type: The annotation type. Either '30min-man' (manual only) 104 or '30min-man_15min-pre' (manual + pre-labeling correction, more data). 105 offsets: Offset values for affinity computation used as target. 106 boundaries: Whether to compute boundaries as the target. 107 binary: Whether to use a binary segmentation target. 108 download: Whether to download the data if it is not present. 109 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 110 111 Returns: 112 The segmentation dataset. 113 """ 114 image_paths, seg_paths = get_microbeseg_paths(path, split, annotation_type, download) 115 116 kwargs = util.ensure_transforms(ndim=2, **kwargs) 117 kwargs, _ = util.add_instance_label_transform( 118 kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary 119 ) 120 121 return torch_em.default_segmentation_dataset( 122 raw_paths=image_paths, 123 raw_key=None, 124 label_paths=seg_paths, 125 label_key=None, 126 patch_shape=patch_shape, 127 is_seg_dataset=False, 128 ndim=2, 129 **kwargs 130 )
Get the microbeSEG dataset for bacterial cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
- annotation_type: The annotation type. Either '30min-man' (manual only) or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_microbeseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test', 'complete'] = 'train', annotation_type: Literal['30min-man', '30min-man_15min-pre'] = '30min-man_15min-pre', offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
133def get_microbeseg_loader( 134 path: Union[os.PathLike, str], 135 batch_size: int, 136 patch_shape: Tuple[int, int], 137 split: Literal["train", "val", "test", "complete"] = "train", 138 annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre", 139 offsets: Optional[List[List[int]]] = None, 140 boundaries: bool = False, 141 binary: bool = False, 142 download: bool = False, 143 **kwargs 144) -> DataLoader: 145 """Get the microbeSEG dataloader for bacterial cell segmentation. 146 147 Args: 148 path: Filepath to a folder where the downloaded data will be saved. 149 batch_size: The batch size for training. 150 patch_shape: The patch shape to use for training. 151 split: The data split to use. One of 'train', 'val', 'test' or 'complete'. 152 annotation_type: The annotation type. Either '30min-man' (manual only) 153 or '30min-man_15min-pre' (manual + pre-labeling correction, more data). 154 offsets: Offset values for affinity computation used as target. 155 boundaries: Whether to compute boundaries as the target. 156 binary: Whether to use a binary segmentation target. 157 download: Whether to download the data if it is not present. 158 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 159 160 Returns: 161 The DataLoader. 162 """ 163 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 164 dataset = get_microbeseg_dataset( 165 path=path, 166 patch_shape=patch_shape, 167 split=split, 168 annotation_type=annotation_type, 169 offsets=offsets, 170 boundaries=boundaries, 171 binary=binary, 172 download=download, 173 **ds_kwargs, 174 ) 175 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the microbeSEG dataloader for bacterial cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
- annotation_type: The annotation type. Either '30min-man' (manual only) or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.