torch_em.data.datasets.light_microscopy.phmamm

The PhMamm dataset contains 3D light-sheet microscopy volumes of Phallusia mammillata embryos with cell membrane segmentations.

The dataset is located at https://figshare.com/articles/dataset/3D_Mask_R-CNN_data/26973085. The original data is from the publication https://doi.org/10.1126/science.aar5663. Please cite it if you use this dataset in your research.

View Source

  1"""The PhMamm dataset contains 3D light-sheet microscopy volumes of Phallusia mammillata
  2embryos with cell membrane segmentations.
  3
  4The dataset is located at https://figshare.com/articles/dataset/3D_Mask_R-CNN_data/26973085.
  5The original data is from the publication https://doi.org/10.1126/science.aar5663.
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, Optional, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21URLS = {
 22    "inputs": "https://ndownloader.figshare.com/files/51130115",
 23    "ground_truth": "https://ndownloader.figshare.com/files/51130100",
 24}
 25CHECKSUMS = {
 26    "inputs": None,
 27    "ground_truth": None,
 28}
 29
 30
 31def get_phmamm_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 32    """Download the PhMamm dataset.
 33
 34    Args:
 35        path: Filepath to a folder where the downloaded data will be saved.
 36        download: Whether to download the data if it is not present.
 37
 38    Returns:
 39        Filepath where the dataset is stored.
 40    """
 41    data_dir = os.path.join(path, "data")
 42    if os.path.exists(data_dir):
 43        return data_dir
 44
 45    os.makedirs(path, exist_ok=True)
 46
 47    inputs_zip = os.path.join(path, "Inputs.zip")
 48    util.download_source(inputs_zip, URLS["inputs"], download, checksum=CHECKSUMS["inputs"])
 49    util.unzip(inputs_zip, data_dir, remove=True)
 50
 51    gt_zip = os.path.join(path, "ASTEC_Ground_truth.zip")
 52    util.download_source(gt_zip, URLS["ground_truth"], download, checksum=CHECKSUMS["ground_truth"])
 53    util.unzip(gt_zip, data_dir, remove=True)
 54
 55    return data_dir
 56
 57
 58def get_phmamm_paths(
 59    path: Union[os.PathLike, str], download: bool = False,
 60) -> Tuple[List[str], List[str]]:
 61    """Get paths to the PhMamm data.
 62
 63    Args:
 64        path: Filepath to a folder where the downloaded data will be saved.
 65        download: Whether to download the data if it is not present.
 66
 67    Returns:
 68        List of filepaths for the image data.
 69        List of filepaths for the label data.
 70    """
 71    data_dir = get_phmamm_data(path, download)
 72
 73    raw_paths = natsorted(glob(os.path.join(data_dir, "Inputs", "*.tiff")))
 74    label_paths = natsorted(glob(os.path.join(data_dir, "ASTEC_Ground_truth", "*.tiff")))
 75    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 76
 77    return raw_paths, label_paths
 78
 79
 80def get_phmamm_dataset(
 81    path: Union[os.PathLike, str],
 82    patch_shape: Tuple[int, int, int],
 83    offsets: Optional[List[List[int]]] = None,
 84    boundaries: bool = False,
 85    binary: bool = False,
 86    download: bool = False,
 87    **kwargs
 88) -> Dataset:
 89    """Get the PhMamm dataset for cell segmentation in light-sheet microscopy.
 90
 91    Args:
 92        path: Filepath to a folder where the downloaded data will be saved.
 93        patch_shape: The patch shape to use for training.
 94        offsets: Offset values for affinity computation used as target.
 95        boundaries: Whether to compute boundaries as the target.
 96        binary: Whether to use a binary segmentation target.
 97        download: Whether to download the data if it is not present.
 98        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 99
100    Returns:
101        The segmentation dataset.
102    """
103    raw_paths, label_paths = get_phmamm_paths(path, download)
104
105    kwargs, _ = util.add_instance_label_transform(
106        kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary
107    )
108
109    return torch_em.default_segmentation_dataset(
110        raw_paths=raw_paths,
111        raw_key=None,
112        label_paths=label_paths,
113        label_key=None,
114        patch_shape=patch_shape,
115        **kwargs
116    )
117
118
119def get_phmamm_loader(
120    path: Union[os.PathLike, str],
121    batch_size: int,
122    patch_shape: Tuple[int, int, int],
123    offsets: Optional[List[List[int]]] = None,
124    boundaries: bool = False,
125    binary: bool = False,
126    download: bool = False,
127    **kwargs
128) -> DataLoader:
129    """Get the PhMamm dataloader for cell segmentation in light-sheet microscopy.
130
131    Args:
132        path: Filepath to a folder where the downloaded data will be saved.
133        batch_size: The batch size for training.
134        patch_shape: The patch shape to use for training.
135        offsets: Offset values for affinity computation used as target.
136        boundaries: Whether to compute boundaries as the target.
137        binary: Whether to use a binary segmentation target.
138        download: Whether to download the data if it is not present.
139        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
140
141    Returns:
142        The DataLoader.
143    """
144    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
145    dataset = get_phmamm_dataset(
146        path=path,
147        patch_shape=patch_shape,
148        offsets=offsets,
149        boundaries=boundaries,
150        binary=binary,
151        download=download,
152        **ds_kwargs,
153    )
154    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

URLS = {'inputs': 'https://ndownloader.figshare.com/files/51130115', 'ground_truth': 'https://ndownloader.figshare.com/files/51130100'}

CHECKSUMS = {'inputs': None, 'ground_truth': None}

def get_phmamm_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

32def get_phmamm_data(path: Union[os.PathLike, str], download: bool = False) -> str:
33    """Download the PhMamm dataset.
34
35    Args:
36        path: Filepath to a folder where the downloaded data will be saved.
37        download: Whether to download the data if it is not present.
38
39    Returns:
40        Filepath where the dataset is stored.
41    """
42    data_dir = os.path.join(path, "data")
43    if os.path.exists(data_dir):
44        return data_dir
45
46    os.makedirs(path, exist_ok=True)
47
48    inputs_zip = os.path.join(path, "Inputs.zip")
49    util.download_source(inputs_zip, URLS["inputs"], download, checksum=CHECKSUMS["inputs"])
50    util.unzip(inputs_zip, data_dir, remove=True)
51
52    gt_zip = os.path.join(path, "ASTEC_Ground_truth.zip")
53    util.download_source(gt_zip, URLS["ground_truth"], download, checksum=CHECKSUMS["ground_truth"])
54    util.unzip(gt_zip, data_dir, remove=True)
55
56    return data_dir

Download the PhMamm dataset.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

Filepath where the dataset is stored.

def get_phmamm_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: View Source

59def get_phmamm_paths(
60    path: Union[os.PathLike, str], download: bool = False,
61) -> Tuple[List[str], List[str]]:
62    """Get paths to the PhMamm data.
63
64    Args:
65        path: Filepath to a folder where the downloaded data will be saved.
66        download: Whether to download the data if it is not present.
67
68    Returns:
69        List of filepaths for the image data.
70        List of filepaths for the label data.
71    """
72    data_dir = get_phmamm_data(path, download)
73
74    raw_paths = natsorted(glob(os.path.join(data_dir, "Inputs", "*.tiff")))
75    label_paths = natsorted(glob(os.path.join(data_dir, "ASTEC_Ground_truth", "*.tiff")))
76    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
77
78    return raw_paths, label_paths

Get paths to the PhMamm data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_phmamm_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

 81def get_phmamm_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, int, int],
 84    offsets: Optional[List[List[int]]] = None,
 85    boundaries: bool = False,
 86    binary: bool = False,
 87    download: bool = False,
 88    **kwargs
 89) -> Dataset:
 90    """Get the PhMamm dataset for cell segmentation in light-sheet microscopy.
 91
 92    Args:
 93        path: Filepath to a folder where the downloaded data will be saved.
 94        patch_shape: The patch shape to use for training.
 95        offsets: Offset values for affinity computation used as target.
 96        boundaries: Whether to compute boundaries as the target.
 97        binary: Whether to use a binary segmentation target.
 98        download: Whether to download the data if it is not present.
 99        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
100
101    Returns:
102        The segmentation dataset.
103    """
104    raw_paths, label_paths = get_phmamm_paths(path, download)
105
106    kwargs, _ = util.add_instance_label_transform(
107        kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary
108    )
109
110    return torch_em.default_segmentation_dataset(
111        raw_paths=raw_paths,
112        raw_key=None,
113        label_paths=label_paths,
114        label_key=None,
115        patch_shape=patch_shape,
116        **kwargs
117    )

Get the PhMamm dataset for cell segmentation in light-sheet microscopy.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
offsets: Offset values for affinity computation used as target.
boundaries: Whether to compute boundaries as the target.
binary: Whether to use a binary segmentation target.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_phmamm_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

120def get_phmamm_loader(
121    path: Union[os.PathLike, str],
122    batch_size: int,
123    patch_shape: Tuple[int, int, int],
124    offsets: Optional[List[List[int]]] = None,
125    boundaries: bool = False,
126    binary: bool = False,
127    download: bool = False,
128    **kwargs
129) -> DataLoader:
130    """Get the PhMamm dataloader for cell segmentation in light-sheet microscopy.
131
132    Args:
133        path: Filepath to a folder where the downloaded data will be saved.
134        batch_size: The batch size for training.
135        patch_shape: The patch shape to use for training.
136        offsets: Offset values for affinity computation used as target.
137        boundaries: Whether to compute boundaries as the target.
138        binary: Whether to use a binary segmentation target.
139        download: Whether to download the data if it is not present.
140        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
141
142    Returns:
143        The DataLoader.
144    """
145    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
146    dataset = get_phmamm_dataset(
147        path=path,
148        patch_shape=patch_shape,
149        offsets=offsets,
150        boundaries=boundaries,
151        binary=binary,
152        download=download,
153        **ds_kwargs,
154    )
155    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the PhMamm dataloader for cell segmentation in light-sheet microscopy.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
offsets: Offset values for affinity computation used as target.
boundaries: Whether to compute boundaries as the target.
binary: Whether to use a binary segmentation target.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.