torch_em.data.datasets.light_microscopy.microbeseg

The microbeSEG dataset contains annotations for bacterial cell instance segmentation in phase-contrast microscopy images of B. subtilis and E. coli.

The dataset is located at https://zenodo.org/records/6497715. This dataset is from the publication https://doi.org/10.1371/journal.pone.0277601. Please cite it if you use this dataset in your research.

  1"""The microbeSEG dataset contains annotations for bacterial cell instance segmentation
  2in phase-contrast microscopy images of B. subtilis and E. coli.
  3
  4The dataset is located at https://zenodo.org/records/6497715.
  5This dataset is from the publication https://doi.org/10.1371/journal.pone.0277601.
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Literal, Tuple, Optional, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/6497715/files/microbeSEG_dataset.zip"
 22CHECKSUM = None
 23
 24ANNOTATION_TYPES = ["30min-man", "30min-man_15min-pre"]
 25SPLITS = ["train", "val", "test", "complete"]
 26
 27
 28def get_microbeseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 29    """Download the microbeSEG dataset.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        The filepath to the extracted data directory.
 37    """
 38    data_dir = os.path.join(path, "microbeSEG_dataset")
 39    if os.path.exists(data_dir):
 40        return data_dir
 41
 42    os.makedirs(path, exist_ok=True)
 43    zip_path = os.path.join(path, "microbeSEG_dataset.zip")
 44    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 45    util.unzip(zip_path=zip_path, dst=path)
 46
 47    return data_dir
 48
 49
 50def get_microbeseg_paths(
 51    path: Union[os.PathLike, str],
 52    split: Literal["train", "val", "test", "complete"] = "train",
 53    annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre",
 54    download: bool = False,
 55) -> Tuple[List[str], List[str]]:
 56    """Get paths to the microbeSEG data.
 57
 58    Args:
 59        path: Filepath to a folder where the downloaded data will be saved.
 60        split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
 61        annotation_type: The annotation type. Either '30min-man' (manual only)
 62            or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
 63        download: Whether to download the data if it is not present.
 64
 65    Returns:
 66        List of filepaths for the image data.
 67        List of filepaths for the label data.
 68    """
 69    assert split in SPLITS, f"'{split}' is not a valid split. Choose from {SPLITS}."
 70    assert annotation_type in ANNOTATION_TYPES, \
 71        f"'{annotation_type}' is not a valid annotation type. Choose from {ANNOTATION_TYPES}."
 72
 73    data_dir = get_microbeseg_data(path, download)
 74
 75    split_dir = os.path.join(data_dir, annotation_type, split)
 76    assert os.path.exists(split_dir), f"Split directory not found: {split_dir}"
 77
 78    image_paths = natsorted(glob(os.path.join(split_dir, "img_*.tif")))
 79    seg_paths = natsorted(glob(os.path.join(split_dir, "mask_*.tif")))
 80    assert len(image_paths) == len(seg_paths) and len(image_paths) > 0
 81
 82    return image_paths, seg_paths
 83
 84
 85def get_microbeseg_dataset(
 86    path: Union[os.PathLike, str],
 87    patch_shape: Tuple[int, int],
 88    split: Literal["train", "val", "test", "complete"] = "train",
 89    annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre",
 90    offsets: Optional[List[List[int]]] = None,
 91    boundaries: bool = False,
 92    binary: bool = False,
 93    download: bool = False,
 94    **kwargs
 95) -> Dataset:
 96    """Get the microbeSEG dataset for bacterial cell segmentation.
 97
 98    Args:
 99        path: Filepath to a folder where the downloaded data will be saved.
100        patch_shape: The patch shape to use for training.
101        split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
102        annotation_type: The annotation type. Either '30min-man' (manual only)
103            or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
104        offsets: Offset values for affinity computation used as target.
105        boundaries: Whether to compute boundaries as the target.
106        binary: Whether to use a binary segmentation target.
107        download: Whether to download the data if it is not present.
108        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
109
110    Returns:
111        The segmentation dataset.
112    """
113    image_paths, seg_paths = get_microbeseg_paths(path, split, annotation_type, download)
114
115    kwargs = util.ensure_transforms(ndim=2, **kwargs)
116    kwargs, _ = util.add_instance_label_transform(
117        kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary
118    )
119
120    return torch_em.default_segmentation_dataset(
121        raw_paths=image_paths,
122        raw_key=None,
123        label_paths=seg_paths,
124        label_key=None,
125        patch_shape=patch_shape,
126        is_seg_dataset=False,
127        ndim=2,
128        **kwargs
129    )
130
131
132def get_microbeseg_loader(
133    path: Union[os.PathLike, str],
134    batch_size: int,
135    patch_shape: Tuple[int, int],
136    split: Literal["train", "val", "test", "complete"] = "train",
137    annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre",
138    offsets: Optional[List[List[int]]] = None,
139    boundaries: bool = False,
140    binary: bool = False,
141    download: bool = False,
142    **kwargs
143) -> DataLoader:
144    """Get the microbeSEG dataloader for bacterial cell segmentation.
145
146    Args:
147        path: Filepath to a folder where the downloaded data will be saved.
148        batch_size: The batch size for training.
149        patch_shape: The patch shape to use for training.
150        split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
151        annotation_type: The annotation type. Either '30min-man' (manual only)
152            or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
153        offsets: Offset values for affinity computation used as target.
154        boundaries: Whether to compute boundaries as the target.
155        binary: Whether to use a binary segmentation target.
156        download: Whether to download the data if it is not present.
157        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
158
159    Returns:
160        The DataLoader.
161    """
162    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
163    dataset = get_microbeseg_dataset(
164        path=path,
165        patch_shape=patch_shape,
166        split=split,
167        annotation_type=annotation_type,
168        offsets=offsets,
169        boundaries=boundaries,
170        binary=binary,
171        download=download,
172        **ds_kwargs,
173    )
174    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/6497715/files/microbeSEG_dataset.zip'
CHECKSUM = None
ANNOTATION_TYPES = ['30min-man', '30min-man_15min-pre']
SPLITS = ['train', 'val', 'test', 'complete']
def get_microbeseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
29def get_microbeseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30    """Download the microbeSEG dataset.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        The filepath to the extracted data directory.
38    """
39    data_dir = os.path.join(path, "microbeSEG_dataset")
40    if os.path.exists(data_dir):
41        return data_dir
42
43    os.makedirs(path, exist_ok=True)
44    zip_path = os.path.join(path, "microbeSEG_dataset.zip")
45    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
46    util.unzip(zip_path=zip_path, dst=path)
47
48    return data_dir

Download the microbeSEG dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the extracted data directory.

def get_microbeseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test', 'complete'] = 'train', annotation_type: Literal['30min-man', '30min-man_15min-pre'] = '30min-man_15min-pre', download: bool = False) -> Tuple[List[str], List[str]]:
51def get_microbeseg_paths(
52    path: Union[os.PathLike, str],
53    split: Literal["train", "val", "test", "complete"] = "train",
54    annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre",
55    download: bool = False,
56) -> Tuple[List[str], List[str]]:
57    """Get paths to the microbeSEG data.
58
59    Args:
60        path: Filepath to a folder where the downloaded data will be saved.
61        split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
62        annotation_type: The annotation type. Either '30min-man' (manual only)
63            or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
64        download: Whether to download the data if it is not present.
65
66    Returns:
67        List of filepaths for the image data.
68        List of filepaths for the label data.
69    """
70    assert split in SPLITS, f"'{split}' is not a valid split. Choose from {SPLITS}."
71    assert annotation_type in ANNOTATION_TYPES, \
72        f"'{annotation_type}' is not a valid annotation type. Choose from {ANNOTATION_TYPES}."
73
74    data_dir = get_microbeseg_data(path, download)
75
76    split_dir = os.path.join(data_dir, annotation_type, split)
77    assert os.path.exists(split_dir), f"Split directory not found: {split_dir}"
78
79    image_paths = natsorted(glob(os.path.join(split_dir, "img_*.tif")))
80    seg_paths = natsorted(glob(os.path.join(split_dir, "mask_*.tif")))
81    assert len(image_paths) == len(seg_paths) and len(image_paths) > 0
82
83    return image_paths, seg_paths

Get paths to the microbeSEG data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
  • annotation_type: The annotation type. Either '30min-man' (manual only) or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_microbeseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test', 'complete'] = 'train', annotation_type: Literal['30min-man', '30min-man_15min-pre'] = '30min-man_15min-pre', offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 86def get_microbeseg_dataset(
 87    path: Union[os.PathLike, str],
 88    patch_shape: Tuple[int, int],
 89    split: Literal["train", "val", "test", "complete"] = "train",
 90    annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre",
 91    offsets: Optional[List[List[int]]] = None,
 92    boundaries: bool = False,
 93    binary: bool = False,
 94    download: bool = False,
 95    **kwargs
 96) -> Dataset:
 97    """Get the microbeSEG dataset for bacterial cell segmentation.
 98
 99    Args:
100        path: Filepath to a folder where the downloaded data will be saved.
101        patch_shape: The patch shape to use for training.
102        split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
103        annotation_type: The annotation type. Either '30min-man' (manual only)
104            or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
105        offsets: Offset values for affinity computation used as target.
106        boundaries: Whether to compute boundaries as the target.
107        binary: Whether to use a binary segmentation target.
108        download: Whether to download the data if it is not present.
109        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
110
111    Returns:
112        The segmentation dataset.
113    """
114    image_paths, seg_paths = get_microbeseg_paths(path, split, annotation_type, download)
115
116    kwargs = util.ensure_transforms(ndim=2, **kwargs)
117    kwargs, _ = util.add_instance_label_transform(
118        kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary
119    )
120
121    return torch_em.default_segmentation_dataset(
122        raw_paths=image_paths,
123        raw_key=None,
124        label_paths=seg_paths,
125        label_key=None,
126        patch_shape=patch_shape,
127        is_seg_dataset=False,
128        ndim=2,
129        **kwargs
130    )

Get the microbeSEG dataset for bacterial cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
  • annotation_type: The annotation type. Either '30min-man' (manual only) or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_microbeseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test', 'complete'] = 'train', annotation_type: Literal['30min-man', '30min-man_15min-pre'] = '30min-man_15min-pre', offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
133def get_microbeseg_loader(
134    path: Union[os.PathLike, str],
135    batch_size: int,
136    patch_shape: Tuple[int, int],
137    split: Literal["train", "val", "test", "complete"] = "train",
138    annotation_type: Literal["30min-man", "30min-man_15min-pre"] = "30min-man_15min-pre",
139    offsets: Optional[List[List[int]]] = None,
140    boundaries: bool = False,
141    binary: bool = False,
142    download: bool = False,
143    **kwargs
144) -> DataLoader:
145    """Get the microbeSEG dataloader for bacterial cell segmentation.
146
147    Args:
148        path: Filepath to a folder where the downloaded data will be saved.
149        batch_size: The batch size for training.
150        patch_shape: The patch shape to use for training.
151        split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
152        annotation_type: The annotation type. Either '30min-man' (manual only)
153            or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
154        offsets: Offset values for affinity computation used as target.
155        boundaries: Whether to compute boundaries as the target.
156        binary: Whether to use a binary segmentation target.
157        download: Whether to download the data if it is not present.
158        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
159
160    Returns:
161        The DataLoader.
162    """
163    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
164    dataset = get_microbeseg_dataset(
165        path=path,
166        patch_shape=patch_shape,
167        split=split,
168        annotation_type=annotation_type,
169        offsets=offsets,
170        boundaries=boundaries,
171        binary=binary,
172        download=download,
173        **ds_kwargs,
174    )
175    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the microbeSEG dataloader for bacterial cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. One of 'train', 'val', 'test' or 'complete'.
  • annotation_type: The annotation type. Either '30min-man' (manual only) or '30min-man_15min-pre' (manual + pre-labeling correction, more data).
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.