torch_em.data.datasets.light_microscopy.yeastsam
The YeastSAM dataset contains annotations for budding yeast cell instance segmentation in DIC (Differential Interference Contrast) microscopy images.
The dataset provides 44 images with corresponding instance segmentation masks.
The dataset is located at https://zenodo.org/records/17204942. This dataset is from the publication https://doi.org/10.1101/2025.09.17.676679. Please cite it if you use this dataset in your research.
1"""The YeastSAM dataset contains annotations for budding yeast cell 2instance segmentation in DIC (Differential Interference Contrast) microscopy images. 3 4The dataset provides 44 images with corresponding instance segmentation masks. 5 6The dataset is located at https://zenodo.org/records/17204942. 7This dataset is from the publication https://doi.org/10.1101/2025.09.17.676679. 8Please cite it if you use this dataset in your research. 9""" 10 11import os 12from typing import Union, Tuple 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URL = "https://zenodo.org/records/17204942/files/CLB2.zip?download=1" 22CHECKSUM = "dc2f32a1ea79e2f65bc28ce79e41681d734b48d312f7fcf43956c4eae41af774" 23 24 25def get_yeastsam_data(path: Union[os.PathLike, str], download: bool = False) -> str: 26 """Download the YeastSAM dataset. 27 28 Args: 29 path: Filepath to a folder where the downloaded data will be saved. 30 download: Whether to download the data if it is not present. 31 32 Returns: 33 The filepath to the directory with the data. 34 """ 35 data_dir = os.path.join(path, "DIC") 36 if os.path.exists(data_dir): 37 return path 38 39 os.makedirs(path, exist_ok=True) 40 zip_path = os.path.join(path, "CLB2.zip") 41 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 42 util.unzip(zip_path=zip_path, dst=path) 43 44 return path 45 46 47def get_yeastsam_paths( 48 path: Union[os.PathLike, str], 49 download: bool = False, 50) -> Tuple[str, str]: 51 """Get paths to the YeastSAM data. 52 53 Args: 54 path: Filepath to a folder where the downloaded data will be saved. 55 download: Whether to download the data if it is not present. 56 57 Returns: 58 Filepath to the folder where image data is stored. 59 Filepath to the folder where label data is stored. 60 """ 61 get_yeastsam_data(path, download) 62 63 image_folder = os.path.join(path, "DIC") 64 label_folder = os.path.join(path, "DIC_mask") 65 66 return image_folder, label_folder 67 68 69def get_yeastsam_dataset( 70 path: Union[os.PathLike, str], 71 patch_shape: Tuple[int, int], 72 download: bool = False, 73 **kwargs 74) -> Dataset: 75 """Get the YeastSAM dataset for yeast cell segmentation. 76 77 Args: 78 path: Filepath to a folder where the downloaded data will be saved. 79 patch_shape: The patch shape to use for training. 80 download: Whether to download the data if it is not present. 81 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 82 83 Returns: 84 The segmentation dataset. 85 """ 86 image_folder, label_folder = get_yeastsam_paths(path, download) 87 88 kwargs, _ = util.add_instance_label_transform( 89 kwargs, add_binary_target=True, 90 ) 91 kwargs = util.ensure_transforms(ndim=2, **kwargs) 92 93 return torch_em.default_segmentation_dataset( 94 raw_paths=image_folder, 95 raw_key="*.tif", 96 label_paths=label_folder, 97 label_key="*.tif", 98 patch_shape=patch_shape, 99 is_seg_dataset=False, 100 ndim=2, 101 **kwargs 102 ) 103 104 105def get_yeastsam_loader( 106 path: Union[os.PathLike, str], 107 batch_size: int, 108 patch_shape: Tuple[int, int], 109 download: bool = False, 110 **kwargs 111) -> DataLoader: 112 """Get the YeastSAM dataloader for yeast cell segmentation. 113 114 Args: 115 path: Filepath to a folder where the downloaded data will be saved. 116 batch_size: The batch size for training. 117 patch_shape: The patch shape to use for training. 118 download: Whether to download the data if it is not present. 119 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 120 121 Returns: 122 The DataLoader. 123 """ 124 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 125 dataset = get_yeastsam_dataset( 126 path=path, 127 patch_shape=patch_shape, 128 download=download, 129 **ds_kwargs, 130 ) 131 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
26def get_yeastsam_data(path: Union[os.PathLike, str], download: bool = False) -> str: 27 """Download the YeastSAM dataset. 28 29 Args: 30 path: Filepath to a folder where the downloaded data will be saved. 31 download: Whether to download the data if it is not present. 32 33 Returns: 34 The filepath to the directory with the data. 35 """ 36 data_dir = os.path.join(path, "DIC") 37 if os.path.exists(data_dir): 38 return path 39 40 os.makedirs(path, exist_ok=True) 41 zip_path = os.path.join(path, "CLB2.zip") 42 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 43 util.unzip(zip_path=zip_path, dst=path) 44 45 return path
Download the YeastSAM dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the directory with the data.
48def get_yeastsam_paths( 49 path: Union[os.PathLike, str], 50 download: bool = False, 51) -> Tuple[str, str]: 52 """Get paths to the YeastSAM data. 53 54 Args: 55 path: Filepath to a folder where the downloaded data will be saved. 56 download: Whether to download the data if it is not present. 57 58 Returns: 59 Filepath to the folder where image data is stored. 60 Filepath to the folder where label data is stored. 61 """ 62 get_yeastsam_data(path, download) 63 64 image_folder = os.path.join(path, "DIC") 65 label_folder = os.path.join(path, "DIC_mask") 66 67 return image_folder, label_folder
Get paths to the YeastSAM data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath to the folder where image data is stored. Filepath to the folder where label data is stored.
70def get_yeastsam_dataset( 71 path: Union[os.PathLike, str], 72 patch_shape: Tuple[int, int], 73 download: bool = False, 74 **kwargs 75) -> Dataset: 76 """Get the YeastSAM dataset for yeast cell segmentation. 77 78 Args: 79 path: Filepath to a folder where the downloaded data will be saved. 80 patch_shape: The patch shape to use for training. 81 download: Whether to download the data if it is not present. 82 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 83 84 Returns: 85 The segmentation dataset. 86 """ 87 image_folder, label_folder = get_yeastsam_paths(path, download) 88 89 kwargs, _ = util.add_instance_label_transform( 90 kwargs, add_binary_target=True, 91 ) 92 kwargs = util.ensure_transforms(ndim=2, **kwargs) 93 94 return torch_em.default_segmentation_dataset( 95 raw_paths=image_folder, 96 raw_key="*.tif", 97 label_paths=label_folder, 98 label_key="*.tif", 99 patch_shape=patch_shape, 100 is_seg_dataset=False, 101 ndim=2, 102 **kwargs 103 )
Get the YeastSAM dataset for yeast cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
106def get_yeastsam_loader( 107 path: Union[os.PathLike, str], 108 batch_size: int, 109 patch_shape: Tuple[int, int], 110 download: bool = False, 111 **kwargs 112) -> DataLoader: 113 """Get the YeastSAM dataloader for yeast cell segmentation. 114 115 Args: 116 path: Filepath to a folder where the downloaded data will be saved. 117 batch_size: The batch size for training. 118 patch_shape: The patch shape to use for training. 119 download: Whether to download the data if it is not present. 120 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 121 122 Returns: 123 The DataLoader. 124 """ 125 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 126 dataset = get_yeastsam_dataset( 127 path=path, 128 patch_shape=patch_shape, 129 download=download, 130 **ds_kwargs, 131 ) 132 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the YeastSAM dataloader for yeast cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.