torch_em.data.datasets.light_microscopy.bac_mother
The BacMother dataset contains bacteria annotations for E. Coli videos. This dataset also has tracking annotations in CTC format.
The dataset is hosted on Zenodo https://doi.org/10.5281/zenodo.11237127. The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1013071.
Please cite it if you use this dataset for your research.
1"""The BacMother dataset contains bacteria annotations for E. Coli videos. 2This dataset also has tracking annotations in CTC format. 3 4The dataset is hosted on Zenodo https://doi.org/10.5281/zenodo.11237127. 5The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1013071. 6 7Please cite it if you use this dataset for your research. 8""" 9 10import os 11from glob import glob 12from natsort import natsorted 13from typing import List, Union, Tuple, Literal 14 15from torch.utils.data import Dataset, DataLoader 16 17import torch_em 18 19from .. import util 20 21 22URL = "https://zenodo.org/records/11237127/files/CTC.zip" 23CHECKSUM = "280f4cacda12094b6eafaae772ce7ea25f8ad6093d2ec2b3d381504dbea70ed3" 24 25 26def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Download the BacMother dataset. 28 29 Args: 30 path: Filepath to a folder where the downloaded data will be saved. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 Filepath where the dataset is stored. 35 """ 36 data_dir = os.path.join(path, "CTC") 37 if os.path.exists(data_dir): 38 return data_dir 39 40 os.makedirs(path, exist_ok=True) 41 42 zip_path = os.path.join(path, "CTC.zip") 43 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 44 util.unzip(zip_path=zip_path, dst=path) 45 46 return data_dir 47 48 49def get_bac_mother_paths( 50 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False, 51) -> Tuple[List[str], List[str]]: 52 """Get paths for the BacMother dataset. 53 54 Args: 55 path: Filepath to a folder where the downloaded data will be saved. 56 split: The data split to use. Either 'train', 'val' or 'test'. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_path = get_bac_mother_data(path, download) 64 65 assert split in ["train", "val", "test"], f"'{split}' is not a valid data split." 66 data_path = os.path.join(data_path, split) 67 68 raw_dirs = [p for p in glob(os.path.join(data_path, "*")) if not p.endswith("_GT")] 69 70 raw_paths, label_paths = [], [] 71 for raw_dir in raw_dirs: 72 raw_paths.extend(natsorted(glob(os.path.join(raw_dir, "t*.tif")))) 73 label_paths.extend(natsorted(glob(os.path.join(f"{raw_dir}_GT", "SEG", "man_seg*.tif")))) 74 75 assert raw_paths and len(raw_paths) == len(label_paths) 76 77 return raw_paths, label_paths 78 79 80def get_bac_mother_dataset( 81 path: Union[os.PathLike, str], 82 patch_shape: Tuple[int, int], 83 split: Literal["train", "val", "test"], 84 download: bool = False, 85 **kwargs 86) -> Dataset: 87 """Get the BacMother dataset for segmentation of bacteria. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 patch_shape: The patch shape to use for training. 92 split: The data split to use. Either 'train', 'val' or 'test'. 93 download: Whether to download the data if it is not present. 94 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 95 96 Returns: 97 The segmentation dataset. 98 """ 99 raw_paths, label_paths = get_bac_mother_paths(path, split, download) 100 101 return torch_em.default_segmentation_dataset( 102 raw_paths=raw_paths, 103 raw_key=None, 104 label_paths=label_paths, 105 label_key=None, 106 ndim=2, 107 with_channels=True, 108 is_seg_dataset=False, 109 patch_shape=patch_shape, 110 ) 111 112 113def get_bac_mother_loader( 114 path: Union[os.PathLike, str], 115 batch_size: int, 116 patch_shape: Tuple[int, int], 117 split: Literal["train", "val", "test"], 118 download: bool = False, 119 **kwargs 120) -> DataLoader: 121 """Get the BacMother dataloader for segmentation of bacteria. 122 123 Args: 124 path: Filepath to a folder where the downloaded data will be saved. 125 batch_size: The batch size for training. 126 patch_shape: The patch shape to use for training. 127 split: The data split to use. Either 'train', 'val' or 'test'. 128 download: Whether to download the data if it is not present. 129 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 130 131 Returns: 132 The DataLoader. 133 """ 134 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 135 dataset = get_bac_mother_dataset(path, patch_shape, split, download, **ds_kwargs) 136 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/11237127/files/CTC.zip'
CHECKSUM =
'280f4cacda12094b6eafaae772ce7ea25f8ad6093d2ec2b3d381504dbea70ed3'
def
get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27def get_bac_mother_data(path: Union[os.PathLike, str], download: bool = False) -> str: 28 """Download the BacMother dataset. 29 30 Args: 31 path: Filepath to a folder where the downloaded data will be saved. 32 download: Whether to download the data if it is not present. 33 34 Returns: 35 Filepath where the dataset is stored. 36 """ 37 data_dir = os.path.join(path, "CTC") 38 if os.path.exists(data_dir): 39 return data_dir 40 41 os.makedirs(path, exist_ok=True) 42 43 zip_path = os.path.join(path, "CTC.zip") 44 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 45 util.unzip(zip_path=zip_path, dst=path) 46 47 return data_dir
Download the BacMother dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the dataset is stored.
def
get_bac_mother_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
50def get_bac_mother_paths( 51 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool = False, 52) -> Tuple[List[str], List[str]]: 53 """Get paths for the BacMother dataset. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 split: The data split to use. Either 'train', 'val' or 'test'. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_path = get_bac_mother_data(path, download) 65 66 assert split in ["train", "val", "test"], f"'{split}' is not a valid data split." 67 data_path = os.path.join(data_path, split) 68 69 raw_dirs = [p for p in glob(os.path.join(data_path, "*")) if not p.endswith("_GT")] 70 71 raw_paths, label_paths = [], [] 72 for raw_dir in raw_dirs: 73 raw_paths.extend(natsorted(glob(os.path.join(raw_dir, "t*.tif")))) 74 label_paths.extend(natsorted(glob(os.path.join(f"{raw_dir}_GT", "SEG", "man_seg*.tif")))) 75 76 assert raw_paths and len(raw_paths) == len(label_paths) 77 78 return raw_paths, label_paths
Get paths for the BacMother dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_bac_mother_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
81def get_bac_mother_dataset( 82 path: Union[os.PathLike, str], 83 patch_shape: Tuple[int, int], 84 split: Literal["train", "val", "test"], 85 download: bool = False, 86 **kwargs 87) -> Dataset: 88 """Get the BacMother dataset for segmentation of bacteria. 89 90 Args: 91 path: Filepath to a folder where the downloaded data will be saved. 92 patch_shape: The patch shape to use for training. 93 split: The data split to use. Either 'train', 'val' or 'test'. 94 download: Whether to download the data if it is not present. 95 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 96 97 Returns: 98 The segmentation dataset. 99 """ 100 raw_paths, label_paths = get_bac_mother_paths(path, split, download) 101 102 return torch_em.default_segmentation_dataset( 103 raw_paths=raw_paths, 104 raw_key=None, 105 label_paths=label_paths, 106 label_key=None, 107 ndim=2, 108 with_channels=True, 109 is_seg_dataset=False, 110 patch_shape=patch_shape, 111 )
Get the BacMother dataset for segmentation of bacteria.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_bac_mother_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
114def get_bac_mother_loader( 115 path: Union[os.PathLike, str], 116 batch_size: int, 117 patch_shape: Tuple[int, int], 118 split: Literal["train", "val", "test"], 119 download: bool = False, 120 **kwargs 121) -> DataLoader: 122 """Get the BacMother dataloader for segmentation of bacteria. 123 124 Args: 125 path: Filepath to a folder where the downloaded data will be saved. 126 batch_size: The batch size for training. 127 patch_shape: The patch shape to use for training. 128 split: The data split to use. Either 'train', 'val' or 'test'. 129 download: Whether to download the data if it is not present. 130 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 131 132 Returns: 133 The DataLoader. 134 """ 135 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 136 dataset = get_bac_mother_dataset(path, patch_shape, split, download, **ds_kwargs) 137 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the BacMother dataloader for segmentation of bacteria.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.