torch_em.data.datasets.light_microscopy.bac_mother

The BacMother dataset contains bacteria annotations for E. Coli videos. This dataset also has tracking annotations in CTC format.

The dataset is hosted on Zenodo https://doi.org/10.5281/zenodo.11237127. The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1013071.

Please cite it if you use this dataset for your research.

View Source

  1"""The BacMother dataset contains bacteria annotations for E. Coli videos.
  2This dataset also has tracking annotations in CTC format.
  3
  4The dataset is hosted on Zenodo https://doi.org/10.5281/zenodo.11237127.
  5The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1013071.
  6
  7Please cite it if you use this dataset for your research.
  8"""
  9
 10import os
 11from glob import glob
 12from natsort import natsorted
 13from typing import List, Union, Tuple, Literal
 14
 15from torch.utils.data import Dataset, DataLoader
 16
 17import torch_em
 18
 19from .. import util
 20
 21
 22URL = "https://zenodo.org/records/11237127/files/CTC.zip"
 23CHECKSUM = "280f4cacda12094b6eafaae772ce7ea25f8ad6093d2ec2b3d381504dbea70ed3"
 24
 25
 26def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 27    """Download the BacMother dataset.
 28
 29    Args:
 30        path: Filepath to a folder where the downloaded data will be saved.
 31        download: Whether to download the data if it is not present.
 32
 33    Returns:
 34        Filepath where the dataset is stored.
 35    """
 36    data_dir = os.path.join(path, "CTC")
 37    if os.path.exists(data_dir):
 38        return data_dir
 39
 40    os.makedirs(path, exist_ok=True)
 41
 42    zip_path = os.path.join(path, "CTC.zip")
 43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 44    util.unzip(zip_path=zip_path, dst=path)
 45
 46    return data_dir
 47
 48
 49def get_bac_mother_paths(
 50    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False,
 51) -> Tuple[List[str], List[str]]:
 52    """Get paths for the BacMother dataset.
 53
 54    Args:
 55        path: Filepath to a folder where the downloaded data will be saved.
 56        split: The data split to use. Either 'train', 'val' or 'test'.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        List of filepaths for the image data.
 61        List of filepaths for the label data.
 62    """
 63    data_path = get_bac_mother_data(path, download)
 64
 65    assert split in ["train", "val", "test"], f"'{split}' is not a valid data split."
 66    data_path = os.path.join(data_path, split)
 67
 68    raw_dirs = [p for p in glob(os.path.join(data_path, "*")) if not p.endswith("_GT")]
 69
 70    raw_paths, label_paths = [], []
 71    for raw_dir in raw_dirs:
 72        raw_paths.extend(natsorted(glob(os.path.join(raw_dir, "t*.tif"))))
 73        label_paths.extend(natsorted(glob(os.path.join(f"{raw_dir}_GT", "SEG", "man_seg*.tif"))))
 74
 75    assert raw_paths and len(raw_paths) == len(label_paths)
 76
 77    return raw_paths, label_paths
 78
 79
 80def get_bac_mother_dataset(
 81    path: Union[os.PathLike, str],
 82    patch_shape: Tuple[int, int],
 83    split: Literal["train", "val", "test"],
 84    download: bool = False,
 85    **kwargs
 86) -> Dataset:
 87    """Get the BacMother dataset for segmentation of bacteria.
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        patch_shape: The patch shape to use for training.
 92        split: The data split to use. Either 'train', 'val' or 'test'.
 93        download: Whether to download the data if it is not present.
 94        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 95
 96    Returns:
 97        The segmentation dataset.
 98    """
 99    raw_paths, label_paths = get_bac_mother_paths(path, split, download)
100
101    return torch_em.default_segmentation_dataset(
102        raw_paths=raw_paths,
103        raw_key=None,
104        label_paths=label_paths,
105        label_key=None,
106        ndim=2,
107        with_channels=True,
108        is_seg_dataset=False,
109        patch_shape=patch_shape,
110    )
111
112
113def get_bac_mother_loader(
114    path: Union[os.PathLike, str],
115    batch_size: int,
116    patch_shape: Tuple[int, int],
117    split: Literal["train", "val", "test"],
118    download: bool = False,
119    **kwargs
120) -> DataLoader:
121    """Get the BacMother dataloader for segmentation of bacteria.
122
123    Args:
124        path: Filepath to a folder where the downloaded data will be saved.
125        batch_size: The batch size for training.
126        patch_shape: The patch shape to use for training.
127        split: The data split to use. Either 'train', 'val' or 'test'.
128        download: Whether to download the data if it is not present.
129        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
130
131    Returns:
132        The DataLoader.
133    """
134    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
135    dataset = get_bac_mother_dataset(path, patch_shape, split, download, **ds_kwargs)
136    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

URL = 'https://zenodo.org/records/11237127/files/CTC.zip'

CHECKSUM = '280f4cacda12094b6eafaae772ce7ea25f8ad6093d2ec2b3d381504dbea70ed3'

def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

27def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str:
28    """Download the BacMother dataset.
29
30    Args:
31        path: Filepath to a folder where the downloaded data will be saved.
32        download: Whether to download the data if it is not present.
33
34    Returns:
35        Filepath where the dataset is stored.
36    """
37    data_dir = os.path.join(path, "CTC")
38    if os.path.exists(data_dir):
39        return data_dir
40
41    os.makedirs(path, exist_ok=True)
42
43    zip_path = os.path.join(path, "CTC.zip")
44    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
45    util.unzip(zip_path=zip_path, dst=path)
46
47    return data_dir

Download the BacMother dataset.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

Filepath where the dataset is stored.

def get_bac_mother_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]: View Source

50def get_bac_mother_paths(
51    path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False,
52) -> Tuple[List[str], List[str]]:
53    """Get paths for the BacMother dataset.
54
55    Args:
56        path: Filepath to a folder where the downloaded data will be saved.
57        split: The data split to use. Either 'train', 'val' or 'test'.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        List of filepaths for the image data.
62        List of filepaths for the label data.
63    """
64    data_path = get_bac_mother_data(path, download)
65
66    assert split in ["train", "val", "test"], f"'{split}' is not a valid data split."
67    data_path = os.path.join(data_path, split)
68
69    raw_dirs = [p for p in glob(os.path.join(data_path, "*")) if not p.endswith("_GT")]
70
71    raw_paths, label_paths = [], []
72    for raw_dir in raw_dirs:
73        raw_paths.extend(natsorted(glob(os.path.join(raw_dir, "t*.tif"))))
74        label_paths.extend(natsorted(glob(os.path.join(f"{raw_dir}_GT", "SEG", "man_seg*.tif"))))
75
76    assert raw_paths and len(raw_paths) == len(label_paths)
77
78    return raw_paths, label_paths

Get paths for the BacMother dataset.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The data split to use. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_bac_mother_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 81def get_bac_mother_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, int],
 84    split: Literal["train", "val", "test"],
 85    download: bool = False,
 86    **kwargs
 87) -> Dataset:
 88    """Get the BacMother dataset for segmentation of bacteria.
 89
 90    Args:
 91        path: Filepath to a folder where the downloaded data will be saved.
 92        patch_shape: The patch shape to use for training.
 93        split: The data split to use. Either 'train', 'val' or 'test'.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    raw_paths, label_paths = get_bac_mother_paths(path, split, download)
101
102    return torch_em.default_segmentation_dataset(
103        raw_paths=raw_paths,
104        raw_key=None,
105        label_paths=label_paths,
106        label_key=None,
107        ndim=2,
108        with_channels=True,
109        is_seg_dataset=False,
110        patch_shape=patch_shape,
111    )

Get the BacMother dataset for segmentation of bacteria.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
split: The data split to use. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_bac_mother_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

114def get_bac_mother_loader(
115    path: Union[os.PathLike, str],
116    batch_size: int,
117    patch_shape: Tuple[int, int],
118    split: Literal["train", "val", "test"],
119    download: bool = False,
120    **kwargs
121) -> DataLoader:
122    """Get the BacMother dataloader for segmentation of bacteria.
123
124    Args:
125        path: Filepath to a folder where the downloaded data will be saved.
126        batch_size: The batch size for training.
127        patch_shape: The patch shape to use for training.
128        split: The data split to use. Either 'train', 'val' or 'test'.
129        download: Whether to download the data if it is not present.
130        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
131
132    Returns:
133        The DataLoader.
134    """
135    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
136    dataset = get_bac_mother_dataset(path, patch_shape, split, download, **ds_kwargs)
137    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the BacMother dataloader for segmentation of bacteria.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The data split to use. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.