torch_em.data.datasets.light_microscopy.bac_mother

The BacMother dataset contains bacteria annotations for E. Coli videos. This dataset also has tracking annotations in CTC format.

The dataset is hosted on Zenodo https://doi.org/10.5281/zenodo.11237127. The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1013071.

Please cite it if you use this dataset for your research.

  1"""The BacMother dataset contains bacteria annotations for E. Coli videos.
  2This dataset also has tracking annotations in CTC format.
  3
  4The dataset is hosted on Zenodo https://doi.org/10.5281/zenodo.11237127.
  5The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1013071.
  6
  7Please cite it if you use this dataset for your research.
  8"""
  9
 10import os
 11from glob import glob
 12from natsort import natsorted
 13from typing import List, Union, Tuple, Literal
 14
 15from torch.utils.data import Dataset, DataLoader
 16
 17import torch_em
 18
 19from .. import util
 20
 21
 22URL = "https://zenodo.org/records/11237127/files/CTC.zip"
 23CHECKSUM = "280f4cacda12094b6eafaae772ce7ea25f8ad6093d2ec2b3d381504dbea70ed3"
 24
 25
 26def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 27    """Download the BacMother dataset.
 28
 29    Args:
 30        path: Filepath to a folder where the downloaded data will be saved.
 31        download: Whether to download the data if it is not present.
 32
 33    Returns:
 34        Filepath where the dataset is stored.
 35    """
 36    data_dir = os.path.join(path, "CTC")
 37    if os.path.exists(data_dir):
 38        return data_dir
 39
 40    os.makedirs(path, exist_ok=True)
 41
 42    zip_path = os.path.join(path, "CTC.zip")
 43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 44    util.unzip(zip_path=zip_path, dst=path)
 45
 46    return data_dir
 47
 48
 49def get_bac_mother_paths(
 50    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False,
 51) -> Tuple[List[str], List[str]]:
 52    """Get paths for the BacMother dataset.
 53
 54    Args:
 55        path: Filepath to a folder where the downloaded data will be saved.
 56        split: The data split to use. Either 'train', 'val' or 'test'.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        List of filepaths for the image data.
 61        List of filepaths for the label data.
 62    """
 63    data_path = get_bac_mother_data(path, download)
 64
 65    assert split in ["train", "val", "test"], f"'{split}' is not a valid data split."
 66    data_path = os.path.join(data_path, split)
 67
 68    raw_dirs = [p for p in glob(os.path.join(data_path, "*")) if not p.endswith("_GT")]
 69
 70    raw_paths, label_paths = [], []
 71    for raw_dir in raw_dirs:
 72        raw_paths.extend(natsorted(glob(os.path.join(raw_dir, "t*.tif"))))
 73        label_paths.extend(natsorted(glob(os.path.join(f"{raw_dir}_GT", "SEG", "man_seg*.tif"))))
 74
 75    assert raw_paths and len(raw_paths) == len(label_paths)
 76
 77    return raw_paths, label_paths
 78
 79
 80def get_bac_mother_dataset(
 81    path: Union[os.PathLike, str],
 82    patch_shape: Tuple[int, int],
 83    split: Literal["train", "val", "test"],
 84    download: bool = False,
 85    **kwargs
 86) -> Dataset:
 87    """Get the BacMother dataset for segmentation of bacteria.
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        patch_shape: The patch shape to use for training.
 92        split: The data split to use. Either 'train', 'val' or 'test'.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97        The segmentation dataset.
 98    """
 99    raw_paths, label_paths = get_bac_mother_paths(path, split, download)
100
101    return torch_em.default_segmentation_dataset(
102        raw_paths=raw_paths,
103        raw_key=None,
104        label_paths=label_paths,
105        label_key=None,
106        ndim=2,
107        with_channels=True,
108        is_seg_dataset=False,
109        patch_shape=patch_shape,
110    )
111
112
113def get_bac_mother_loader(
114    path: Union[os.PathLike, str],
115    batch_size: int,
116    patch_shape: Tuple[int, int],
117    split: Literal["train", "val", "test"],
118    download: bool = False,
119    **kwargs
120) -> DataLoader:
121    """Get the BacMother dataloader for segmentation of bacteria.
122
123    Args:
124        path: Filepath to a folder where the downloaded data will be saved.
125        batch_size: The batch size for training.
126        patch_shape: The patch shape to use for training.
127        split: The data split to use. Either 'train', 'val' or 'test'.
128        download: Whether to download the data if it is not present.
129        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
130
131    Returns:
132        The DataLoader.
133    """
134    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
135    dataset = get_bac_mother_dataset(path, patch_shape, split, download, **ds_kwargs)
136    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/11237127/files/CTC.zip'
CHECKSUM = '280f4cacda12094b6eafaae772ce7ea25f8ad6093d2ec2b3d381504dbea70ed3'
def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str:
28    """Download the BacMother dataset.
29
30    Args:
31        path: Filepath to a folder where the downloaded data will be saved.
32        download: Whether to download the data if it is not present.
33
34    Returns:
35        Filepath where the dataset is stored.
36    """
37    data_dir = os.path.join(path, "CTC")
38    if os.path.exists(data_dir):
39        return data_dir
40
41    os.makedirs(path, exist_ok=True)
42
43    zip_path = os.path.join(path, "CTC.zip")
44    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
45    util.unzip(zip_path=zip_path, dst=path)
46
47    return data_dir

Download the BacMother dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the dataset is stored.

def get_bac_mother_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
50def get_bac_mother_paths(
51    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False,
52) -> Tuple[List[str], List[str]]:
53    """Get paths for the BacMother dataset.
54
55    Args:
56        path: Filepath to a folder where the downloaded data will be saved.
57        split: The data split to use. Either 'train', 'val' or 'test'.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        List of filepaths for the image data.
62        List of filepaths for the label data.
63    """
64    data_path = get_bac_mother_data(path, download)
65
66    assert split in ["train", "val", "test"], f"'{split}' is not a valid data split."
67    data_path = os.path.join(data_path, split)
68
69    raw_dirs = [p for p in glob(os.path.join(data_path, "*")) if not p.endswith("_GT")]
70
71    raw_paths, label_paths = [], []
72    for raw_dir in raw_dirs:
73        raw_paths.extend(natsorted(glob(os.path.join(raw_dir, "t*.tif"))))
74        label_paths.extend(natsorted(glob(os.path.join(f"{raw_dir}_GT", "SEG", "man_seg*.tif"))))
75
76    assert raw_paths and len(raw_paths) == len(label_paths)
77
78    return raw_paths, label_paths

Get paths for the BacMother dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_bac_mother_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 81def get_bac_mother_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, int],
 84    split: Literal["train", "val", "test"],
 85    download: bool = False,
 86    **kwargs
 87) -> Dataset:
 88    """Get the BacMother dataset for segmentation of bacteria.
 89
 90    Args:
 91        path: Filepath to a folder where the downloaded data will be saved.
 92        patch_shape: The patch shape to use for training.
 93        split: The data split to use. Either 'train', 'val' or 'test'.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    raw_paths, label_paths = get_bac_mother_paths(path, split, download)
101
102    return torch_em.default_segmentation_dataset(
103        raw_paths=raw_paths,
104        raw_key=None,
105        label_paths=label_paths,
106        label_key=None,
107        ndim=2,
108        with_channels=True,
109        is_seg_dataset=False,
110        patch_shape=patch_shape,
111    )

Get the BacMother dataset for segmentation of bacteria.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_bac_mother_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
114def get_bac_mother_loader(
115    path: Union[os.PathLike, str],
116    batch_size: int,
117    patch_shape: Tuple[int, int],
118    split: Literal["train", "val", "test"],
119    download: bool = False,
120    **kwargs
121) -> DataLoader:
122    """Get the BacMother dataloader for segmentation of bacteria.
123
124    Args:
125        path: Filepath to a folder where the downloaded data will be saved.
126        batch_size: The batch size for training.
127        patch_shape: The patch shape to use for training.
128        split: The data split to use. Either 'train', 'val' or 'test'.
129        download: Whether to download the data if it is not present.
130        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
131
132    Returns:
133        The DataLoader.
134    """
135    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
136    dataset = get_bac_mother_dataset(path, patch_shape, split, download, **ds_kwargs)
137    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the BacMother dataloader for segmentation of bacteria.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', 'val' or 'test'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.