torch_em.data.datasets.light_microscopy.cellseg_3d

This dataset contains annotation for nucleus segmentation in 3d fluorescence microscopy from mesoSPIM microscopy.

This dataset is from the publication https://doi.org/10.1101/2024.05.17.594691 . Please cite it if you use this dataset in your research.

  1"""This dataset contains annotation for nucleus segmentation in 3d fluorescence microscopy from mesoSPIM microscopy.
  2
  3This dataset is from the publication https://doi.org/10.1101/2024.05.17.594691 .
  4Please cite it if you use this dataset in your research.
  5"""
  6
  7import os
  8from glob import glob
  9from typing import Optional, Tuple, Union, List
 10
 11from torch.utils.data import Dataset, DataLoader
 12
 13import torch_em
 14
 15from .. import util
 16
 17
 18URL = "https://zenodo.org/records/11095111/files/DATASET_WITH_GT.zip?download=1"
 19CHECKSUM = "6d8e8d778e479000161fdfea70201a6ded95b3958a703f69def63e69bbddf9d6"
 20
 21
 22def get_cellseg_3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 23    """Download the CellSeg3d training data.
 24
 25    Args:
 26        path: Filepath to a folder where the downloaded data will be saved.
 27        download: Whether to download the data if it is not present.
 28
 29    Returns:
 30        The filepath to the training data.
 31    """
 32    url = URL
 33    checksum = CHECKSUM
 34
 35    data_path = os.path.join(path, "DATASET_WITH_GT")
 36    if os.path.exists(data_path):
 37        return data_path
 38
 39    os.makedirs(path, exist_ok=True)
 40    zip_path = os.path.join(path, "cellseg3d.zip")
 41    util.download_source(zip_path, url, download, checksum)
 42    util.unzip(zip_path, path, True)
 43
 44    return data_path
 45
 46
 47def get_cellseg_3d_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 48    """Get paths to the CellSeg3d data.
 49
 50    Args:
 51        path: Filepath to a folder where the downloaded data will be saved.
 52        download: Whether to download the data if it is not present.
 53
 54    Returns:
 55        List of filepaths for the image data.
 56        List of filepaths for the label data.
 57    """
 58    data_root = get_cellseg_3d_data(path, download)
 59
 60    raw_paths = sorted(glob(os.path.join(data_root, "*.tif")))
 61    label_paths = sorted(glob(os.path.join(data_root, "labels", "*.tif")))
 62    assert len(raw_paths) == len(label_paths)
 63
 64    return raw_paths, label_paths
 65
 66
 67def get_cellseg_3d_dataset(
 68    path: Union[os.PathLike, str],
 69    patch_shape: Tuple[int, int],
 70    sample_ids: Optional[Tuple[int, ...]] = None,
 71    download: bool = False,
 72    **kwargs
 73) -> Dataset:
 74    """Get the CellSeg3d dataset for segmenting nuclei in 3d fluorescence microscopy.
 75
 76    Args:
 77        path: Filepath to a folder where the downloaded data will be saved.
 78        patch_shape: The patch shape to use for training.
 79        sample_ids: The volume ids to load.
 80        download: Whether to download the data if it is not present.
 81        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 82
 83    Returns:
 84       The segmentation dataset.
 85    """
 86    raw_paths, label_paths = get_cellseg_3d_paths(path, download)
 87
 88    if sample_ids is not None:
 89        assert all(sid < len(raw_paths) for sid in sample_ids)
 90        raw_paths = [raw_paths[i] for i in sample_ids]
 91        label_paths = [label_paths[i] for i in sample_ids]
 92
 93    return torch_em.default_segmentation_dataset(
 94        raw_paths=raw_paths,
 95        raw_key=None,
 96        label_paths=label_paths,
 97        label_key=None,
 98        patch_shape=patch_shape,
 99        **kwargs
100    )
101
102
103def get_cellseg_3d_loader(
104    path: Union[os.PathLike, str],
105    patch_shape: Tuple[int, int],
106    batch_size: int,
107    sample_ids: Optional[Tuple[int, ...]] = None,
108    download: bool = False,
109    **kwargs
110) -> DataLoader:
111    """Get the CellSeg3d dataloader for segmenting nuclei in 3d fluorescence microscopy.
112
113    Args:
114        path: Filepath to a folder where the downloaded data will be saved.
115        patch_shape: The patch shape to use for training.
116        batch_size: The batch size for training.
117        sample_ids: The volume ids to load.
118        download: Whether to download the data if it is not present.
119        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
120
121    Returns:
122        The DataLoader.
123    """
124    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
125    dataset = get_cellseg_3d_dataset(path, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs)
126    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/11095111/files/DATASET_WITH_GT.zip?download=1'
CHECKSUM = '6d8e8d778e479000161fdfea70201a6ded95b3958a703f69def63e69bbddf9d6'
def get_cellseg_3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
23def get_cellseg_3d_data(path: Union[os.PathLike, str], download: bool = False) -> str:
24    """Download the CellSeg3d training data.
25
26    Args:
27        path: Filepath to a folder where the downloaded data will be saved.
28        download: Whether to download the data if it is not present.
29
30    Returns:
31        The filepath to the training data.
32    """
33    url = URL
34    checksum = CHECKSUM
35
36    data_path = os.path.join(path, "DATASET_WITH_GT")
37    if os.path.exists(data_path):
38        return data_path
39
40    os.makedirs(path, exist_ok=True)
41    zip_path = os.path.join(path, "cellseg3d.zip")
42    util.download_source(zip_path, url, download, checksum)
43    util.unzip(zip_path, path, True)
44
45    return data_path

Download the CellSeg3d training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the training data.

def get_cellseg_3d_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
48def get_cellseg_3d_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
49    """Get paths to the CellSeg3d data.
50
51    Args:
52        path: Filepath to a folder where the downloaded data will be saved.
53        download: Whether to download the data if it is not present.
54
55    Returns:
56        List of filepaths for the image data.
57        List of filepaths for the label data.
58    """
59    data_root = get_cellseg_3d_data(path, download)
60
61    raw_paths = sorted(glob(os.path.join(data_root, "*.tif")))
62    label_paths = sorted(glob(os.path.join(data_root, "labels", "*.tif")))
63    assert len(raw_paths) == len(label_paths)
64
65    return raw_paths, label_paths

Get paths to the CellSeg3d data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_cellseg_3d_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], sample_ids: Optional[Tuple[int, ...]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 68def get_cellseg_3d_dataset(
 69    path: Union[os.PathLike, str],
 70    patch_shape: Tuple[int, int],
 71    sample_ids: Optional[Tuple[int, ...]] = None,
 72    download: bool = False,
 73    **kwargs
 74) -> Dataset:
 75    """Get the CellSeg3d dataset for segmenting nuclei in 3d fluorescence microscopy.
 76
 77    Args:
 78        path: Filepath to a folder where the downloaded data will be saved.
 79        patch_shape: The patch shape to use for training.
 80        sample_ids: The volume ids to load.
 81        download: Whether to download the data if it is not present.
 82        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 83
 84    Returns:
 85       The segmentation dataset.
 86    """
 87    raw_paths, label_paths = get_cellseg_3d_paths(path, download)
 88
 89    if sample_ids is not None:
 90        assert all(sid < len(raw_paths) for sid in sample_ids)
 91        raw_paths = [raw_paths[i] for i in sample_ids]
 92        label_paths = [label_paths[i] for i in sample_ids]
 93
 94    return torch_em.default_segmentation_dataset(
 95        raw_paths=raw_paths,
 96        raw_key=None,
 97        label_paths=label_paths,
 98        label_key=None,
 99        patch_shape=patch_shape,
100        **kwargs
101    )

Get the CellSeg3d dataset for segmenting nuclei in 3d fluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_ids: The volume ids to load.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_cellseg_3d_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], batch_size: int, sample_ids: Optional[Tuple[int, ...]] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
104def get_cellseg_3d_loader(
105    path: Union[os.PathLike, str],
106    patch_shape: Tuple[int, int],
107    batch_size: int,
108    sample_ids: Optional[Tuple[int, ...]] = None,
109    download: bool = False,
110    **kwargs
111) -> DataLoader:
112    """Get the CellSeg3d dataloader for segmenting nuclei in 3d fluorescence microscopy.
113
114    Args:
115        path: Filepath to a folder where the downloaded data will be saved.
116        patch_shape: The patch shape to use for training.
117        batch_size: The batch size for training.
118        sample_ids: The volume ids to load.
119        download: Whether to download the data if it is not present.
120        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
121
122    Returns:
123        The DataLoader.
124    """
125    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
126    dataset = get_cellseg_3d_dataset(path, patch_shape, sample_ids=sample_ids, download=download, **ds_kwargs)
127    return torch_em.get_data_loader(dataset, batch_size=batch_size, **loader_kwargs)

Get the CellSeg3d dataloader for segmenting nuclei in 3d fluorescence microscopy.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_ids: The volume ids to load.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.