torch_em.data.datasets.light_microscopy.toiam

The TOIAM dataset contains annotations for microbial cell segmentation in phase contrast microscopy images of microbial live-cell images (MLCI).

The dataset is located at https://doi.org/10.5281/zenodo.7260137. This dataset is from the publicaiton https://arxiv.org/html/2411.00552v1. Please cite it if you use this dataset for your research.

  1"""The TOIAM dataset contains annotations for microbial cell segmentation in
  2phase contrast microscopy images of microbial live-cell images (MLCI).
  3
  4The dataset is located at https://doi.org/10.5281/zenodo.7260137.
  5This dataset is from the publicaiton https://arxiv.org/html/2411.00552v1.
  6Please cite it if you use this dataset for your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/7260137/files/ctc_format.zip"
 22CHECKSUM = "9ec73277b29f2b032037d9e07c73c428ff51456c23a5866bf214bf5a71590c31"
 23
 24
 25def get_toiam_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the TOIAM dataset.
 27
 28    Args:
 29        path: Filepath to a folder where the data is downloaded for further processing.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        Filepath where the data is downloaded.
 34    """
 35    data_dir = os.path.join(path, "data")
 36    if os.path.exists(data_dir):
 37        return data_dir
 38
 39    os.makedirs(path, exist_ok=True)
 40
 41    zip_path = os.path.join(path, "ctc_format.zip")
 42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 43    util.unzip(zip_path=zip_path, dst=data_dir)
 44
 45    return data_dir
 46
 47
 48def get_toiam_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 49    """Get paths to the TOIAM data.
 50
 51    Args:
 52        path: Filepath to a folder where the data is downloaded for further processing.
 53        download: Whether to download the data if it is not present.
 54
 55    Returns:
 56        List of filepaths for the image data.
 57        List of filepaths for the label data.
 58    """
 59    data_dir = get_toiam_data(path, download)
 60
 61    raw_paths = natsorted(glob(os.path.join(data_dir, "0*", "*.tif")))
 62    label_paths = natsorted(glob(os.path.join(data_dir, "0*_GT", "SEG", "man_*.tif")))
 63
 64    return raw_paths, label_paths
 65
 66
 67def get_toiam_dataset(
 68    path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs
 69) -> Dataset:
 70    """Get the TOIAM dataset for microbial cell segmentation.
 71
 72    Args:
 73        path: Filepath to a folder where the data is downloaded for further processing.
 74        patch_shape: The patch shape to use for training.
 75        download: Whether to download the data if it is not present.
 76        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 77
 78    Returns:
 79        The segmentation dataset.
 80    """
 81    raw_paths, label_paths = get_toiam_paths(path, download)
 82
 83    return torch_em.default_segmentation_dataset(
 84        raw_paths=raw_paths,
 85        raw_key=None,
 86        label_paths=label_paths,
 87        label_key=None,
 88        patch_shape=patch_shape,
 89        is_seg_dataset=False,
 90        **kwargs
 91    )
 92
 93
 94def get_toiam_loader(
 95    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs
 96) -> DataLoader:
 97    """Get the TOIAM dataloader for microbial cell segmentation.
 98
 99    Args:
100        path: Filepath to a folder where the data is downloaded for further processing.
101        batch_size: The batch size for training.
102        patch_shape: The patch shape to use for training.
103        download: Whether to download the data if it is not present.
104        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
105
106    Returns:
107        The DataLoader.
108    """
109    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
110    dataset = get_toiam_dataset(path, patch_shape, download, **ds_kwargs)
111    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/7260137/files/ctc_format.zip'
CHECKSUM = '9ec73277b29f2b032037d9e07c73c428ff51456c23a5866bf214bf5a71590c31'
def get_toiam_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_toiam_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the TOIAM dataset.
28
29    Args:
30        path: Filepath to a folder where the data is downloaded for further processing.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        Filepath where the data is downloaded.
35    """
36    data_dir = os.path.join(path, "data")
37    if os.path.exists(data_dir):
38        return data_dir
39
40    os.makedirs(path, exist_ok=True)
41
42    zip_path = os.path.join(path, "ctc_format.zip")
43    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
44    util.unzip(zip_path=zip_path, dst=data_dir)
45
46    return data_dir

Download the TOIAM dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_toiam_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
49def get_toiam_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
50    """Get paths to the TOIAM data.
51
52    Args:
53        path: Filepath to a folder where the data is downloaded for further processing.
54        download: Whether to download the data if it is not present.
55
56    Returns:
57        List of filepaths for the image data.
58        List of filepaths for the label data.
59    """
60    data_dir = get_toiam_data(path, download)
61
62    raw_paths = natsorted(glob(os.path.join(data_dir, "0*", "*.tif")))
63    label_paths = natsorted(glob(os.path.join(data_dir, "0*_GT", "SEG", "man_*.tif")))
64
65    return raw_paths, label_paths

Get paths to the TOIAM data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_toiam_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
68def get_toiam_dataset(
69    path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs
70) -> Dataset:
71    """Get the TOIAM dataset for microbial cell segmentation.
72
73    Args:
74        path: Filepath to a folder where the data is downloaded for further processing.
75        patch_shape: The patch shape to use for training.
76        download: Whether to download the data if it is not present.
77        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
78
79    Returns:
80        The segmentation dataset.
81    """
82    raw_paths, label_paths = get_toiam_paths(path, download)
83
84    return torch_em.default_segmentation_dataset(
85        raw_paths=raw_paths,
86        raw_key=None,
87        label_paths=label_paths,
88        label_key=None,
89        patch_shape=patch_shape,
90        is_seg_dataset=False,
91        **kwargs
92    )

Get the TOIAM dataset for microbial cell segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_toiam_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
 95def get_toiam_loader(
 96    path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs
 97) -> DataLoader:
 98    """Get the TOIAM dataloader for microbial cell segmentation.
 99
100    Args:
101        path: Filepath to a folder where the data is downloaded for further processing.
102        batch_size: The batch size for training.
103        patch_shape: The patch shape to use for training.
104        download: Whether to download the data if it is not present.
105        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
106
107    Returns:
108        The DataLoader.
109    """
110    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
111    dataset = get_toiam_dataset(path, patch_shape, download, **ds_kwargs)
112    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the TOIAM dataloader for microbial cell segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.