torch_em.data.datasets.light_microscopy.yeastsam

The YeastSAM dataset contains annotations for budding yeast cell instance segmentation in DIC (Differential Interference Contrast) microscopy images.

The dataset provides 44 images with corresponding instance segmentation masks.

The dataset is located at https://zenodo.org/records/17204942. This dataset is from the publication https://doi.org/10.1101/2025.09.17.676679. Please cite it if you use this dataset in your research.

  1"""The YeastSAM dataset contains annotations for budding yeast cell
  2instance segmentation in DIC (Differential Interference Contrast) microscopy images.
  3
  4The dataset provides 44 images with corresponding instance segmentation masks.
  5
  6The dataset is located at https://zenodo.org/records/17204942.
  7This dataset is from the publication https://doi.org/10.1101/2025.09.17.676679.
  8Please cite it if you use this dataset in your research.
  9"""
 10
 11import os
 12from typing import Union, Tuple
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URL = "https://zenodo.org/records/17204942/files/CLB2.zip?download=1"
 22CHECKSUM = "dc2f32a1ea79e2f65bc28ce79e41681d734b48d312f7fcf43956c4eae41af774"
 23
 24
 25def get_yeastsam_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 26    """Download the YeastSAM dataset.
 27
 28    Args:
 29        path: Filepath to a folder where the downloaded data will be saved.
 30        download: Whether to download the data if it is not present.
 31
 32    Returns:
 33        The filepath to the directory with the data.
 34    """
 35    data_dir = os.path.join(path, "DIC")
 36    if os.path.exists(data_dir):
 37        return path
 38
 39    os.makedirs(path, exist_ok=True)
 40    zip_path = os.path.join(path, "CLB2.zip")
 41    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 42    util.unzip(zip_path=zip_path, dst=path)
 43
 44    return path
 45
 46
 47def get_yeastsam_paths(
 48    path: Union[os.PathLike, str],
 49    download: bool = False,
 50) -> Tuple[str, str]:
 51    """Get paths to the YeastSAM data.
 52
 53    Args:
 54        path: Filepath to a folder where the downloaded data will be saved.
 55        download: Whether to download the data if it is not present.
 56
 57    Returns:
 58        Filepath to the folder where image data is stored.
 59        Filepath to the folder where label data is stored.
 60    """
 61    get_yeastsam_data(path, download)
 62
 63    image_folder = os.path.join(path, "DIC")
 64    label_folder = os.path.join(path, "DIC_mask")
 65
 66    return image_folder, label_folder
 67
 68
 69def get_yeastsam_dataset(
 70    path: Union[os.PathLike, str],
 71    patch_shape: Tuple[int, int],
 72    download: bool = False,
 73    **kwargs
 74) -> Dataset:
 75    """Get the YeastSAM dataset for yeast cell segmentation.
 76
 77    Args:
 78        path: Filepath to a folder where the downloaded data will be saved.
 79        patch_shape: The patch shape to use for training.
 80        download: Whether to download the data if it is not present.
 81        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 82
 83    Returns:
 84        The segmentation dataset.
 85    """
 86    image_folder, label_folder = get_yeastsam_paths(path, download)
 87
 88    kwargs, _ = util.add_instance_label_transform(
 89        kwargs, add_binary_target=True,
 90    )
 91    kwargs = util.ensure_transforms(ndim=2, **kwargs)
 92
 93    return torch_em.default_segmentation_dataset(
 94        raw_paths=image_folder,
 95        raw_key="*.tif",
 96        label_paths=label_folder,
 97        label_key="*.tif",
 98        patch_shape=patch_shape,
 99        is_seg_dataset=False,
100        ndim=2,
101        **kwargs
102    )
103
104
105def get_yeastsam_loader(
106    path: Union[os.PathLike, str],
107    batch_size: int,
108    patch_shape: Tuple[int, int],
109    download: bool = False,
110    **kwargs
111) -> DataLoader:
112    """Get the YeastSAM dataloader for yeast cell segmentation.
113
114    Args:
115        path: Filepath to a folder where the downloaded data will be saved.
116        batch_size: The batch size for training.
117        patch_shape: The patch shape to use for training.
118        download: Whether to download the data if it is not present.
119        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
120
121    Returns:
122        The DataLoader.
123    """
124    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
125    dataset = get_yeastsam_dataset(
126        path=path,
127        patch_shape=patch_shape,
128        download=download,
129        **ds_kwargs,
130    )
131    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/17204942/files/CLB2.zip?download=1'
CHECKSUM = 'dc2f32a1ea79e2f65bc28ce79e41681d734b48d312f7fcf43956c4eae41af774'
def get_yeastsam_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26def get_yeastsam_data(path: Union[os.PathLike, str], download: bool = False) -> str:
27    """Download the YeastSAM dataset.
28
29    Args:
30        path: Filepath to a folder where the downloaded data will be saved.
31        download: Whether to download the data if it is not present.
32
33    Returns:
34        The filepath to the directory with the data.
35    """
36    data_dir = os.path.join(path, "DIC")
37    if os.path.exists(data_dir):
38        return path
39
40    os.makedirs(path, exist_ok=True)
41    zip_path = os.path.join(path, "CLB2.zip")
42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
43    util.unzip(zip_path=zip_path, dst=path)
44
45    return path

Download the YeastSAM dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the directory with the data.

def get_yeastsam_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[str, str]:
48def get_yeastsam_paths(
49    path: Union[os.PathLike, str],
50    download: bool = False,
51) -> Tuple[str, str]:
52    """Get paths to the YeastSAM data.
53
54    Args:
55        path: Filepath to a folder where the downloaded data will be saved.
56        download: Whether to download the data if it is not present.
57
58    Returns:
59        Filepath to the folder where image data is stored.
60        Filepath to the folder where label data is stored.
61    """
62    get_yeastsam_data(path, download)
63
64    image_folder = os.path.join(path, "DIC")
65    label_folder = os.path.join(path, "DIC_mask")
66
67    return image_folder, label_folder

Get paths to the YeastSAM data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath to the folder where image data is stored. Filepath to the folder where label data is stored.

def get_yeastsam_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 70def get_yeastsam_dataset(
 71    path: Union[os.PathLike, str],
 72    patch_shape: Tuple[int, int],
 73    download: bool = False,
 74    **kwargs
 75) -> Dataset:
 76    """Get the YeastSAM dataset for yeast cell segmentation.
 77
 78    Args:
 79        path: Filepath to a folder where the downloaded data will be saved.
 80        patch_shape: The patch shape to use for training.
 81        download: Whether to download the data if it is not present.
 82        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 83
 84    Returns:
 85        The segmentation dataset.
 86    """
 87    image_folder, label_folder = get_yeastsam_paths(path, download)
 88
 89    kwargs, _ = util.add_instance_label_transform(
 90        kwargs, add_binary_target=True,
 91    )
 92    kwargs = util.ensure_transforms(ndim=2, **kwargs)
 93
 94    return torch_em.default_segmentation_dataset(
 95        raw_paths=image_folder,
 96        raw_key="*.tif",
 97        label_paths=label_folder,
 98        label_key="*.tif",
 99        patch_shape=patch_shape,
100        is_seg_dataset=False,
101        ndim=2,
102        **kwargs
103    )

Get the YeastSAM dataset for yeast cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_yeastsam_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
106def get_yeastsam_loader(
107    path: Union[os.PathLike, str],
108    batch_size: int,
109    patch_shape: Tuple[int, int],
110    download: bool = False,
111    **kwargs
112) -> DataLoader:
113    """Get the YeastSAM dataloader for yeast cell segmentation.
114
115    Args:
116        path: Filepath to a folder where the downloaded data will be saved.
117        batch_size: The batch size for training.
118        patch_shape: The patch shape to use for training.
119        download: Whether to download the data if it is not present.
120        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
121
122    Returns:
123        The DataLoader.
124    """
125    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
126    dataset = get_yeastsam_dataset(
127        path=path,
128        patch_shape=patch_shape,
129        download=download,
130        **ds_kwargs,
131    )
132    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the YeastSAM dataloader for yeast cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.