torch_em.data.datasets.light_microscopy.yeastcellseg
The YeastCellSeg dataset contains annotations for yeast cell segmentation in 2D bright field microscopy images.
The dataset provides 15 images of 1024x1024 pixels with binary cell body annotations. Instance segmentation labels are derived via connected components.
The dataset is from the publication https://doi.org/10.1109/ISBI.2014.6868107. Please cite it if you use this dataset in your research.
1"""The YeastCellSeg dataset contains annotations for yeast cell segmentation 2in 2D bright field microscopy images. 3 4The dataset provides 15 images of 1024x1024 pixels with binary cell body annotations. 5Instance segmentation labels are derived via connected components. 6 7The dataset is from the publication https://doi.org/10.1109/ISBI.2014.6868107. 8Please cite it if you use this dataset in your research. 9""" 10 11import os 12from glob import glob 13from typing import Union, Literal, Tuple, List 14 15import numpy as np 16import imageio.v3 as imageio 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25BASE_URL = "https://zenodo.org/records/344879/files" 26_FILENAMES = [f"DS01_{i:02d}" for i in range(1, 16)] 27 28 29def _create_h5_data(path, raw_dir, gt_dir): 30 """Create h5 files with raw images, semantic masks and instance labels. 31 32 Each h5 file contains: 33 - 'raw': (H, W) uint8 grayscale bright field image. 34 - 'labels/semantic': (H, W) uint8 binary mask (0=background, 1=cell). 35 - 'labels/instances': (H, W) int64 connected component labels. 36 """ 37 import h5py 38 from skimage.measure import label 39 40 h5_dir = os.path.join(path, "h5_data") 41 os.makedirs(h5_dir, exist_ok=True) 42 43 for name in _FILENAMES: 44 h5_path = os.path.join(h5_dir, f"{name}.h5") 45 if os.path.exists(h5_path): 46 continue 47 48 img = imageio.imread(os.path.join(raw_dir, f"{name}.tif")) 49 gt = imageio.imread(os.path.join(gt_dir, f"{name}_gt.tif")) 50 51 semantic = (gt > 0).astype("uint8") 52 instances = label(semantic).astype("int64") 53 54 with h5py.File(h5_path, "w") as f: 55 f.create_dataset("raw", data=img, compression="gzip") 56 f.create_dataset("labels/semantic", data=semantic, compression="gzip") 57 f.create_dataset("labels/instances", data=instances, compression="gzip") 58 59 return h5_dir 60 61 62def get_yeastcellseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 63 """Download the YeastCellSeg dataset. 64 65 Args: 66 path: Filepath to a folder where the downloaded data will be saved. 67 download: Whether to download the data if it is not present. 68 69 Returns: 70 The filepath to the directory with the h5 data. 71 """ 72 h5_dir = os.path.join(path, "h5_data") 73 if os.path.exists(h5_dir) and len(glob(os.path.join(h5_dir, "*.h5"))) == len(_FILENAMES): 74 return h5_dir 75 76 raw_dir = os.path.join(path, "raw") 77 gt_dir = os.path.join(path, "gt") 78 os.makedirs(raw_dir, exist_ok=True) 79 os.makedirs(gt_dir, exist_ok=True) 80 81 for name in _FILENAMES: 82 raw_path = os.path.join(raw_dir, f"{name}.tif") 83 if not os.path.exists(raw_path): 84 util.download_source(path=raw_path, url=f"{BASE_URL}/{name}.tif", download=download, checksum=None) 85 86 gt_path = os.path.join(gt_dir, f"{name}_gt.tif") 87 if not os.path.exists(gt_path): 88 util.download_source(path=gt_path, url=f"{BASE_URL}/{name}_gt.tif", download=download, checksum=None) 89 90 return _create_h5_data(path, raw_dir, gt_dir) 91 92 93def get_yeastcellseg_paths( 94 path: Union[os.PathLike, str], 95 download: bool = False, 96) -> List[str]: 97 """Get paths to the YeastCellSeg data. 98 99 Args: 100 path: Filepath to a folder where the downloaded data will be saved. 101 download: Whether to download the data if it is not present. 102 103 Returns: 104 List of filepaths for the h5 data. 105 """ 106 from natsort import natsorted 107 108 h5_dir = get_yeastcellseg_data(path, download) 109 h5_paths = natsorted(glob(os.path.join(h5_dir, "*.h5"))) 110 assert len(h5_paths) == len(_FILENAMES), f"Expected {len(_FILENAMES)} h5 files, found {len(h5_paths)}" 111 return h5_paths 112 113 114def get_yeastcellseg_dataset( 115 path: Union[os.PathLike, str], 116 patch_shape: Tuple[int, int], 117 segmentation_type: Literal["instances", "semantic"] = "instances", 118 download: bool = False, 119 **kwargs 120) -> Dataset: 121 """Get the YeastCellSeg dataset for yeast cell segmentation. 122 123 Args: 124 path: Filepath to a folder where the downloaded data will be saved. 125 patch_shape: The patch shape to use for training. 126 segmentation_type: The type of segmentation labels to use. 127 One of 'instances' (connected component instance labels) or 'semantic' (binary cell mask). 128 download: Whether to download the data if it is not present. 129 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 130 131 Returns: 132 The segmentation dataset. 133 """ 134 assert segmentation_type in ("instances", "semantic"), \ 135 f"'{segmentation_type}' is not valid. Choose from 'instances' or 'semantic'." 136 137 h5_paths = get_yeastcellseg_paths(path, download) 138 139 label_key = f"labels/{segmentation_type}" 140 141 kwargs, _ = util.add_instance_label_transform( 142 kwargs, add_binary_target=True, label_dtype=np.int64, 143 ) 144 kwargs = util.ensure_transforms(ndim=2, **kwargs) 145 146 return torch_em.default_segmentation_dataset( 147 raw_paths=h5_paths, 148 raw_key="raw", 149 label_paths=h5_paths, 150 label_key=label_key, 151 patch_shape=patch_shape, 152 ndim=2, 153 **kwargs 154 ) 155 156 157def get_yeastcellseg_loader( 158 path: Union[os.PathLike, str], 159 batch_size: int, 160 patch_shape: Tuple[int, int], 161 segmentation_type: Literal["instances", "semantic"] = "instances", 162 download: bool = False, 163 **kwargs 164) -> DataLoader: 165 """Get the YeastCellSeg dataloader for yeast cell segmentation. 166 167 Args: 168 path: Filepath to a folder where the downloaded data will be saved. 169 batch_size: The batch size for training. 170 patch_shape: The patch shape to use for training. 171 segmentation_type: The type of segmentation labels to use. 172 One of 'instances' (connected component instance labels) or 'semantic' (binary cell mask). 173 download: Whether to download the data if it is not present. 174 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 175 176 Returns: 177 The DataLoader. 178 """ 179 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 180 dataset = get_yeastcellseg_dataset( 181 path=path, 182 patch_shape=patch_shape, 183 segmentation_type=segmentation_type, 184 download=download, 185 **ds_kwargs, 186 ) 187 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
BASE_URL =
'https://zenodo.org/records/344879/files'
def
get_yeastcellseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
63def get_yeastcellseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 64 """Download the YeastCellSeg dataset. 65 66 Args: 67 path: Filepath to a folder where the downloaded data will be saved. 68 download: Whether to download the data if it is not present. 69 70 Returns: 71 The filepath to the directory with the h5 data. 72 """ 73 h5_dir = os.path.join(path, "h5_data") 74 if os.path.exists(h5_dir) and len(glob(os.path.join(h5_dir, "*.h5"))) == len(_FILENAMES): 75 return h5_dir 76 77 raw_dir = os.path.join(path, "raw") 78 gt_dir = os.path.join(path, "gt") 79 os.makedirs(raw_dir, exist_ok=True) 80 os.makedirs(gt_dir, exist_ok=True) 81 82 for name in _FILENAMES: 83 raw_path = os.path.join(raw_dir, f"{name}.tif") 84 if not os.path.exists(raw_path): 85 util.download_source(path=raw_path, url=f"{BASE_URL}/{name}.tif", download=download, checksum=None) 86 87 gt_path = os.path.join(gt_dir, f"{name}_gt.tif") 88 if not os.path.exists(gt_path): 89 util.download_source(path=gt_path, url=f"{BASE_URL}/{name}_gt.tif", download=download, checksum=None) 90 91 return _create_h5_data(path, raw_dir, gt_dir)
Download the YeastCellSeg dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the directory with the h5 data.
def
get_yeastcellseg_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
94def get_yeastcellseg_paths( 95 path: Union[os.PathLike, str], 96 download: bool = False, 97) -> List[str]: 98 """Get paths to the YeastCellSeg data. 99 100 Args: 101 path: Filepath to a folder where the downloaded data will be saved. 102 download: Whether to download the data if it is not present. 103 104 Returns: 105 List of filepaths for the h5 data. 106 """ 107 from natsort import natsorted 108 109 h5_dir = get_yeastcellseg_data(path, download) 110 h5_paths = natsorted(glob(os.path.join(h5_dir, "*.h5"))) 111 assert len(h5_paths) == len(_FILENAMES), f"Expected {len(_FILENAMES)} h5 files, found {len(h5_paths)}" 112 return h5_paths
Get paths to the YeastCellSeg data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the h5 data.
def
get_yeastcellseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], segmentation_type: Literal['instances', 'semantic'] = 'instances', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
115def get_yeastcellseg_dataset( 116 path: Union[os.PathLike, str], 117 patch_shape: Tuple[int, int], 118 segmentation_type: Literal["instances", "semantic"] = "instances", 119 download: bool = False, 120 **kwargs 121) -> Dataset: 122 """Get the YeastCellSeg dataset for yeast cell segmentation. 123 124 Args: 125 path: Filepath to a folder where the downloaded data will be saved. 126 patch_shape: The patch shape to use for training. 127 segmentation_type: The type of segmentation labels to use. 128 One of 'instances' (connected component instance labels) or 'semantic' (binary cell mask). 129 download: Whether to download the data if it is not present. 130 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 131 132 Returns: 133 The segmentation dataset. 134 """ 135 assert segmentation_type in ("instances", "semantic"), \ 136 f"'{segmentation_type}' is not valid. Choose from 'instances' or 'semantic'." 137 138 h5_paths = get_yeastcellseg_paths(path, download) 139 140 label_key = f"labels/{segmentation_type}" 141 142 kwargs, _ = util.add_instance_label_transform( 143 kwargs, add_binary_target=True, label_dtype=np.int64, 144 ) 145 kwargs = util.ensure_transforms(ndim=2, **kwargs) 146 147 return torch_em.default_segmentation_dataset( 148 raw_paths=h5_paths, 149 raw_key="raw", 150 label_paths=h5_paths, 151 label_key=label_key, 152 patch_shape=patch_shape, 153 ndim=2, 154 **kwargs 155 )
Get the YeastCellSeg dataset for yeast cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- segmentation_type: The type of segmentation labels to use. One of 'instances' (connected component instance labels) or 'semantic' (binary cell mask).
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_yeastcellseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], segmentation_type: Literal['instances', 'semantic'] = 'instances', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
158def get_yeastcellseg_loader( 159 path: Union[os.PathLike, str], 160 batch_size: int, 161 patch_shape: Tuple[int, int], 162 segmentation_type: Literal["instances", "semantic"] = "instances", 163 download: bool = False, 164 **kwargs 165) -> DataLoader: 166 """Get the YeastCellSeg dataloader for yeast cell segmentation. 167 168 Args: 169 path: Filepath to a folder where the downloaded data will be saved. 170 batch_size: The batch size for training. 171 patch_shape: The patch shape to use for training. 172 segmentation_type: The type of segmentation labels to use. 173 One of 'instances' (connected component instance labels) or 'semantic' (binary cell mask). 174 download: Whether to download the data if it is not present. 175 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 176 177 Returns: 178 The DataLoader. 179 """ 180 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 181 dataset = get_yeastcellseg_dataset( 182 path=path, 183 patch_shape=patch_shape, 184 segmentation_type=segmentation_type, 185 download=download, 186 **ds_kwargs, 187 ) 188 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the YeastCellSeg dataloader for yeast cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- segmentation_type: The type of segmentation labels to use. One of 'instances' (connected component instance labels) or 'semantic' (binary cell mask).
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.