torch_em.data.datasets.electron_microscopy.platynereis

Dataset for the segmentation of different structures in EM volume of a platynereis larve. Contains annotations for the segmentation of:

  • Cuticle
  • Cilia
  • Cells
  • Nuclei

This dataset is from the publication https://doi.org/10.1016/j.cell.2021.07.017. Please cite it if you use this dataset for a publication.

  1"""Dataset for the segmentation of different structures in EM volume of a
  2platynereis larve. Contains annotations for the segmentation of:
  3- Cuticle
  4- Cilia
  5- Cells
  6- Nuclei
  7
  8This dataset is from the publication https://doi.org/10.1016/j.cell.2021.07.017.
  9Please cite it if you use this dataset for a publication.
 10"""
 11
 12import os
 13from glob import glob
 14from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 15
 16import numpy as np
 17import torch_em
 18from torch.utils.data import Dataset, DataLoader
 19from .. import util
 20
 21URLS = {
 22    "cells": "https://zenodo.org/record/3675220/files/membrane.zip",
 23    "nuclei": "https://zenodo.org/record/3675220/files/nuclei.zip",
 24    "cilia": "https://zenodo.org/record/3675220/files/cilia.zip",
 25    "cuticle": "https://zenodo.org/record/3675220/files/cuticle.zip"
 26}
 27
 28CHECKSUMS = {
 29    "cells": "30eb50c39e7e9883e1cd96e0df689fac37a56abb11e8ed088907c94a5980d6a3",
 30    "nuclei": "a05033c5fbc6a3069479ac6595b0a430070f83f5281f5b5c8913125743cf5510",
 31    "cilia": "6d2b47f63d39a671789c02d8b66cad5e4cf30eb14cdb073da1a52b7defcc5e24",
 32    "cuticle": "464f75d30133e8864958049647fe3c2216ddf2d4327569738ad72d299c991843"
 33}
 34
 35
 36#
 37# TODO data-loader for more classes:
 38# - mitos
 39#
 40
 41
 42def _check_data(path, prefix, extension, n_files):
 43    if not os.path.exists(path):
 44        return False
 45    files = glob(os.path.join(path, f"{prefix}*{extension}"))
 46    return len(files) == n_files
 47
 48
 49def _get_paths_and_rois(sample_ids, n_files, template, rois):
 50    if sample_ids is None:
 51        sample_ids = list(range(1, n_files + 1))
 52    else:
 53        assert min(sample_ids) >= 1 and max(sample_ids) <= n_files
 54        sample_ids.sort()
 55    paths = [template % sample for sample in sample_ids]
 56    data_rois = [rois.get(sample, np.s_[:, :, :]) for sample in sample_ids]
 57    return paths, data_rois
 58
 59
 60def get_platy_data(path: Union[os.PathLike, str], name: str, download: bool) -> Tuple[str, int]:
 61    """Download the platynereis dataset.
 62
 63    Args:
 64        path: Filepath to a folder where the downloaded data will be saved.
 65        name: Name of the segmentation task. Available tasks: 'cuticle', 'cilia', 'cells' or 'nuclei'.
 66        download: Whether to download the data if it is not present.
 67
 68    Returns:
 69        The path to the folder where the data has been downloaded.
 70        The number of files downloaded.
 71    """
 72    data_root = os.path.join(path, name)
 73
 74    if name == "cuticle":
 75        ext, prefix, n_files = ".n5", "train_data_", 5
 76    elif name == "cilia":
 77        ext, prefix, n_files = ".h5", "train_data_cilia_", 3
 78    elif name == "cells":
 79        data_root = os.path.join(path, "membrane")
 80        ext, prefix, n_files = ".n5", "train_data_membrane_", 9
 81    elif name == "nuclei":
 82        ext, prefix, n_files = ".h5", "train_data_nuclei_", 12
 83    else:
 84        raise ValueError(f"Invalid name {name}. Expect one of 'cuticle', 'cilia', 'cell' or 'nuclei'.")
 85
 86    data_is_complete = _check_data(data_root, prefix, ext, n_files)
 87    if data_is_complete:
 88        return data_root, n_files
 89
 90    os.makedirs(path, exist_ok=True)
 91    url = URLS[name]
 92    checksum = CHECKSUMS[name]
 93
 94    zip_path = os.path.join(path, f"data-{name}.zip")
 95    util.download_source(zip_path, url, download=download, checksum=checksum)
 96    util.unzip(zip_path, path, remove=True)
 97
 98    return data_root, n_files
 99
100
101def get_platynereis_cuticle_dataset(
102    path: Union[os.PathLike, str],
103    patch_shape: Tuple[int, int, int],
104    sample_ids: Optional[Sequence[int]] = None,
105    download: bool = False,
106    rois: Dict[int, Any] = {},
107    **kwargs
108) -> Dataset:
109    """Get the dataset for cuticle segmentation in platynereis.
110
111    Args:
112        path: Filepath to a folder where the downloaded data will be saved.
113        patch_shape: The patch shape to use for training.
114        sample_ids: The sample ids to use for the dataset
115        download: Whether to download the data if it is not present.
116        rois: The region of interests to use for the data blocks.
117        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
118
119    Returns:
120        The segmentation dataset.
121    """
122    cuticle_root, n_files = get_platy_data(path, "cuticle", download)
123
124    paths, data_rois = _get_paths_and_rois(sample_ids, n_files, os.path.join(cuticle_root, "train_data_%02i.n5"), rois)
125    raw_key, label_key = "volumes/raw", "volumes/labels/segmentation"
126    return torch_em.default_segmentation_dataset(
127        paths, raw_key, paths, label_key, patch_shape, rois=data_rois, **kwargs
128    )
129
130
131def get_platynereis_cuticle_loader(
132    path: Union[os.PathLike, str],
133    patch_shape: Tuple[int, int, int],
134    batch_size: int,
135    sample_ids: Optional[Sequence[int]] = None,
136    download: bool = False,
137    rois: Dict[int, Any] = {},
138    **kwargs
139) -> DataLoader:
140    """Get the dataloader for cuticle segmentation in platynereis.
141
142    Args:
143        path: Filepath to a folder where the downloaded data will be saved.
144        patch_shape: The patch shape to use for training.
145        batch_size: The batch size for training.
146        sample_ids: The sample ids to use for the dataset
147        download: Whether to download the data if it is not present.
148        rois: The region of interests to use for the data blocks.
149        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
150
151    Returns:
152        The DataLoader.
153    """
154    ds_kwargs, loader_kwargs = util.split_kwargs(
155        torch_em.default_segmentation_dataset, **kwargs
156    )
157    ds = get_platynereis_cuticle_dataset(
158        path, patch_shape, sample_ids=sample_ids, download=download, rois=rois, **ds_kwargs,
159    )
160    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
161
162
163def get_platynereis_cilia_dataset(
164    path: Union[os.PathLike, str],
165    patch_shape: Tuple[int, int, int],
166    sample_ids: Optional[Sequence[int]] = None,
167    offsets: Optional[List[List[int]]] = None,
168    boundaries: bool = False,
169    binary: bool = False,
170    rois: Dict[int, Any] = {},
171    download: bool = False,
172    **kwargs
173) -> Dataset:
174    """Get the dataset for cilia segmentation in platynereis.
175
176    Args:
177        path: Filepath to a folder where the downloaded data will be saved.
178        patch_shape: The patch shape to use for training.
179        sample_ids: The sample ids to use for the dataset
180        offsets: Offset values for affinity computation used as target.
181        boundaries: Whether to compute boundaries as the target.
182        binary: Whether to use a binary segmentation target.
183        rois: The region of interests to use for the data blocks.
184        download: Whether to download the data if it is not present.
185        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
186
187    Returns:
188        The segmentation dataset.
189    """
190    cilia_root, n_files = get_platy_data(path, "cilia", download)
191
192    paths, rois = _get_paths_and_rois(sample_ids, n_files, os.path.join(cilia_root, "train_data_cilia_%02i.h5"), rois)
193    raw_key = "volumes/raw"
194    label_key = "volumes/labels/segmentation"
195
196    kwargs, _ = util.add_instance_label_transform(
197        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
198    )
199    return torch_em.default_segmentation_dataset(paths, raw_key, paths, label_key, patch_shape, **kwargs)
200
201
202def get_platynereis_cilia_loader(
203    path: Union[os.PathLike, str],
204    patch_shape: Tuple[int, int, int],
205    batch_size: int,
206    sample_ids: Optional[Sequence[int]] = None,
207    offsets: Optional[List[List[int]]] = None,
208    boundaries: bool = False,
209    binary: bool = False,
210    rois: Dict[int, Any] = {},
211    download: bool = False,
212    **kwargs
213) -> DataLoader:
214    """Get the dataloader for cilia segmentation in platynereis.
215
216    Args:
217        path: Filepath to a folder where the downloaded data will be saved.
218        patch_shape: The patch shape to use for training.
219        batch_size: The batch size for training.
220        sample_ids: The sample ids to use for the dataset
221        offsets: Offset values for affinity computation used as target.
222        boundaries: Whether to compute boundaries as the target.
223        binary: Whether to return a binary segmentation target.
224        rois: The region of interests to use for the data blocks.
225        download: Whether to download the data if it is not present.
226        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
227
228    Returns:
229        The DataLoader.
230    """
231    ds_kwargs, loader_kwargs = util.split_kwargs(
232        torch_em.default_segmentation_dataset, **kwargs
233    )
234    ds = get_platynereis_cilia_dataset(
235        path, patch_shape, sample_ids=sample_ids,
236        offsets=offsets, boundaries=boundaries, binary=binary,
237        rois=rois, download=download, **ds_kwargs,
238    )
239    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
240
241
242def get_platynereis_cell_dataset(
243    path: Union[os.PathLike, str],
244    patch_shape: Tuple[int, int, int],
245    sample_ids: Optional[Sequence[int]] = None,
246    offsets: Optional[List[List[int]]] = None,
247    boundaries: bool = False,
248    rois: Dict[int, Any] = {},
249    download: bool = False,
250    **kwargs
251) -> Dataset:
252    """Get the dataset for cell segmentation in platynereis.
253
254    Args:
255        path: Filepath to a folder where the downloaded data will be saved.
256        patch_shape: The patch shape to use for training.
257        sample_ids: The sample ids to use for the dataset
258        offsets: Offset values for affinity computation used as target.
259        boundaries: Whether to compute boundaries as the target.
260        rois: The region of interests to use for the data blocks.
261        download: Whether to download the data if it is not present.
262        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
263
264    Returns:
265        The segmentation dataset.
266    """
267    cell_root, n_files = get_platy_data(path, "cells", download)
268
269    template = os.path.join(cell_root, "train_data_membrane_%02i.n5")
270    data_paths, data_rois = _get_paths_and_rois(sample_ids, n_files, template, rois)
271
272    kwargs = util.update_kwargs(kwargs, "rois", data_rois)
273    kwargs, _ = util.add_instance_label_transform(
274        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets,
275    )
276
277    raw_key = "volumes/raw/s1"
278    label_key = "volumes/labels/segmentation/s1"
279    return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape,  **kwargs)
280
281
282def get_platynereis_cell_loader(
283    path: Union[os.PathLike, str],
284    patch_shape: Tuple[int, int, int],
285    batch_size: int,
286    sample_ids: Optional[Sequence[int]] = None,
287    offsets: Optional[List[List[int]]] = None,
288    boundaries: bool = False,
289    rois: Dict[int, Any] = {},
290    download: bool = False,
291    **kwargs
292) -> DataLoader:
293    """Get the dataloader for cell segmentation in platynereis.
294
295    Args:
296        path: Filepath to a folder where the downloaded data will be saved.
297        patch_shape: The patch shape to use for training.
298        batch_size: The batch size for training.
299        sample_ids: The sample ids to use for the dataset
300        offsets: Offset values for affinity computation used as target.
301        boundaries: Whether to compute boundaries as the target.
302        rois: The region of interests to use for the data blocks.
303        download: Whether to download the data if it is not present.
304        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
305
306    Returns:
307        The DataLoader.
308    """
309    ds_kwargs, loader_kwargs = util.split_kwargs(
310        torch_em.default_segmentation_dataset, **kwargs
311    )
312    ds = get_platynereis_cell_dataset(
313        path, patch_shape, sample_ids, rois=rois,
314        offsets=offsets, boundaries=boundaries, download=download,
315        **ds_kwargs,
316    )
317    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
318
319
320def get_platynereis_nuclei_dataset(
321    path: Union[os.PathLike, str],
322    patch_shape: Tuple[int, int, int],
323    sample_ids: Optional[Sequence[int]] = None,
324    offsets: Optional[List[List[int]]] = None,
325    boundaries: bool = False,
326    binary: bool = False,
327    rois: Dict[int, Any] = {},
328    download: bool = False,
329    **kwargs
330) -> Dataset:
331    """Get the dataset for nucleus segmentation in platynereis.
332
333    Args:
334        path: Filepath to a folder where the downloaded data will be saved.
335        patch_shape: The patch shape to use for training.
336        sample_ids: The sample ids to use for the dataset
337        offsets: Offset values for affinity computation used as target.
338        boundaries: Whether to compute boundaries as the target.
339        binary: Whether to return a binary segmentation target.
340        rois: The region of interests to use for the data blocks.
341        download: Whether to download the data if it is not present.
342        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
343
344    Returns:
345        The segmentation dataset.
346    """
347    nuc_root, n_files = get_platy_data(path, "nuclei", download)
348
349    if sample_ids is None:
350        sample_ids = list(range(1, n_files + 1))
351    assert min(sample_ids) >= 1 and max(sample_ids) <= n_files
352    sample_ids.sort()
353
354    template = os.path.join(nuc_root, "train_data_nuclei_%02i.h5")
355    data_paths, data_rois = _get_paths_and_rois(sample_ids, n_files, template, rois)
356
357    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
358    kwargs = util.update_kwargs(kwargs, "rois", data_rois)
359    kwargs, _ = util.add_instance_label_transform(
360        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
361    )
362
363    raw_key = "volumes/raw"
364    label_key = "volumes/labels/nucleus_instance_labels"
365    return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape, **kwargs)
366
367
368def get_platynereis_nuclei_loader(
369    path: Union[os.PathLike, str],
370    patch_shape: Tuple[int, int, int],
371    batch_size: int,
372    sample_ids: Optional[Sequence[int]] = None,
373    offsets: Optional[List[List[int]]] = None,
374    boundaries: bool = False,
375    binary: bool = False,
376    rois: Dict[int, Any] = {},
377    download: bool = False,
378    **kwargs
379):
380    """Get the dataloader for nucleus segmentation in platynereis.
381
382    Args:
383        path: Filepath to a folder where the downloaded data will be saved.
384        patch_shape: The patch shape to use for training.
385        batch_size: The batch size for training.
386        sample_ids: The sample ids to use for the dataset
387        offsets: Offset values for affinity computation used as target.
388        boundaries: Whether to compute boundaries as the target.
389        binary: Whether to return a binary segmentation target.
390        rois: The region of interests to use for the data blocks.
391        download: Whether to download the data if it is not present.
392        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
393
394    Returns:
395        The DataLoader.
396    """
397    ds_kwargs, loader_kwargs = util.split_kwargs(
398        torch_em.default_segmentation_dataset, **kwargs
399    )
400    ds = get_platynereis_nuclei_dataset(
401        path, patch_shape, sample_ids=sample_ids, rois=rois,
402        offsets=offsets, boundaries=boundaries, binary=binary, download=download,
403        **ds_kwargs,
404    )
405    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URLS = {'cells': 'https://zenodo.org/record/3675220/files/membrane.zip', 'nuclei': 'https://zenodo.org/record/3675220/files/nuclei.zip', 'cilia': 'https://zenodo.org/record/3675220/files/cilia.zip', 'cuticle': 'https://zenodo.org/record/3675220/files/cuticle.zip'}
CHECKSUMS = {'cells': '30eb50c39e7e9883e1cd96e0df689fac37a56abb11e8ed088907c94a5980d6a3', 'nuclei': 'a05033c5fbc6a3069479ac6595b0a430070f83f5281f5b5c8913125743cf5510', 'cilia': '6d2b47f63d39a671789c02d8b66cad5e4cf30eb14cdb073da1a52b7defcc5e24', 'cuticle': '464f75d30133e8864958049647fe3c2216ddf2d4327569738ad72d299c991843'}
def get_platy_data( path: Union[os.PathLike, str], name: str, download: bool) -> Tuple[str, int]:
61def get_platy_data(path: Union[os.PathLike, str], name: str, download: bool) -> Tuple[str, int]:
62    """Download the platynereis dataset.
63
64    Args:
65        path: Filepath to a folder where the downloaded data will be saved.
66        name: Name of the segmentation task. Available tasks: 'cuticle', 'cilia', 'cells' or 'nuclei'.
67        download: Whether to download the data if it is not present.
68
69    Returns:
70        The path to the folder where the data has been downloaded.
71        The number of files downloaded.
72    """
73    data_root = os.path.join(path, name)
74
75    if name == "cuticle":
76        ext, prefix, n_files = ".n5", "train_data_", 5
77    elif name == "cilia":
78        ext, prefix, n_files = ".h5", "train_data_cilia_", 3
79    elif name == "cells":
80        data_root = os.path.join(path, "membrane")
81        ext, prefix, n_files = ".n5", "train_data_membrane_", 9
82    elif name == "nuclei":
83        ext, prefix, n_files = ".h5", "train_data_nuclei_", 12
84    else:
85        raise ValueError(f"Invalid name {name}. Expect one of 'cuticle', 'cilia', 'cell' or 'nuclei'.")
86
87    data_is_complete = _check_data(data_root, prefix, ext, n_files)
88    if data_is_complete:
89        return data_root, n_files
90
91    os.makedirs(path, exist_ok=True)
92    url = URLS[name]
93    checksum = CHECKSUMS[name]
94
95    zip_path = os.path.join(path, f"data-{name}.zip")
96    util.download_source(zip_path, url, download=download, checksum=checksum)
97    util.unzip(zip_path, path, remove=True)
98
99    return data_root, n_files

Download the platynereis dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • name: Name of the segmentation task. Available tasks: 'cuticle', 'cilia', 'cells' or 'nuclei'.
  • download: Whether to download the data if it is not present.
Returns:

The path to the folder where the data has been downloaded. The number of files downloaded.

def get_platynereis_cuticle_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, download: bool = False, rois: Dict[int, Any] = {}, **kwargs) -> torch.utils.data.dataset.Dataset:
102def get_platynereis_cuticle_dataset(
103    path: Union[os.PathLike, str],
104    patch_shape: Tuple[int, int, int],
105    sample_ids: Optional[Sequence[int]] = None,
106    download: bool = False,
107    rois: Dict[int, Any] = {},
108    **kwargs
109) -> Dataset:
110    """Get the dataset for cuticle segmentation in platynereis.
111
112    Args:
113        path: Filepath to a folder where the downloaded data will be saved.
114        patch_shape: The patch shape to use for training.
115        sample_ids: The sample ids to use for the dataset
116        download: Whether to download the data if it is not present.
117        rois: The region of interests to use for the data blocks.
118        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
119
120    Returns:
121        The segmentation dataset.
122    """
123    cuticle_root, n_files = get_platy_data(path, "cuticle", download)
124
125    paths, data_rois = _get_paths_and_rois(sample_ids, n_files, os.path.join(cuticle_root, "train_data_%02i.n5"), rois)
126    raw_key, label_key = "volumes/raw", "volumes/labels/segmentation"
127    return torch_em.default_segmentation_dataset(
128        paths, raw_key, paths, label_key, patch_shape, rois=data_rois, **kwargs
129    )

Get the dataset for cuticle segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_ids: The sample ids to use for the dataset
  • download: Whether to download the data if it is not present.
  • rois: The region of interests to use for the data blocks.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_platynereis_cuticle_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, download: bool = False, rois: Dict[int, Any] = {}, **kwargs) -> torch.utils.data.dataloader.DataLoader:
132def get_platynereis_cuticle_loader(
133    path: Union[os.PathLike, str],
134    patch_shape: Tuple[int, int, int],
135    batch_size: int,
136    sample_ids: Optional[Sequence[int]] = None,
137    download: bool = False,
138    rois: Dict[int, Any] = {},
139    **kwargs
140) -> DataLoader:
141    """Get the dataloader for cuticle segmentation in platynereis.
142
143    Args:
144        path: Filepath to a folder where the downloaded data will be saved.
145        patch_shape: The patch shape to use for training.
146        batch_size: The batch size for training.
147        sample_ids: The sample ids to use for the dataset
148        download: Whether to download the data if it is not present.
149        rois: The region of interests to use for the data blocks.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
151
152    Returns:
153        The DataLoader.
154    """
155    ds_kwargs, loader_kwargs = util.split_kwargs(
156        torch_em.default_segmentation_dataset, **kwargs
157    )
158    ds = get_platynereis_cuticle_dataset(
159        path, patch_shape, sample_ids=sample_ids, download=download, rois=rois, **ds_kwargs,
160    )
161    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the dataloader for cuticle segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_ids: The sample ids to use for the dataset
  • download: Whether to download the data if it is not present.
  • rois: The region of interests to use for the data blocks.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.

def get_platynereis_cilia_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, rois: Dict[int, Any] = {}, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
164def get_platynereis_cilia_dataset(
165    path: Union[os.PathLike, str],
166    patch_shape: Tuple[int, int, int],
167    sample_ids: Optional[Sequence[int]] = None,
168    offsets: Optional[List[List[int]]] = None,
169    boundaries: bool = False,
170    binary: bool = False,
171    rois: Dict[int, Any] = {},
172    download: bool = False,
173    **kwargs
174) -> Dataset:
175    """Get the dataset for cilia segmentation in platynereis.
176
177    Args:
178        path: Filepath to a folder where the downloaded data will be saved.
179        patch_shape: The patch shape to use for training.
180        sample_ids: The sample ids to use for the dataset
181        offsets: Offset values for affinity computation used as target.
182        boundaries: Whether to compute boundaries as the target.
183        binary: Whether to use a binary segmentation target.
184        rois: The region of interests to use for the data blocks.
185        download: Whether to download the data if it is not present.
186        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
187
188    Returns:
189        The segmentation dataset.
190    """
191    cilia_root, n_files = get_platy_data(path, "cilia", download)
192
193    paths, rois = _get_paths_and_rois(sample_ids, n_files, os.path.join(cilia_root, "train_data_cilia_%02i.h5"), rois)
194    raw_key = "volumes/raw"
195    label_key = "volumes/labels/segmentation"
196
197    kwargs, _ = util.add_instance_label_transform(
198        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
199    )
200    return torch_em.default_segmentation_dataset(paths, raw_key, paths, label_key, patch_shape, **kwargs)

Get the dataset for cilia segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_ids: The sample ids to use for the dataset
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • rois: The region of interests to use for the data blocks.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_platynereis_cilia_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, rois: Dict[int, Any] = {}, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
203def get_platynereis_cilia_loader(
204    path: Union[os.PathLike, str],
205    patch_shape: Tuple[int, int, int],
206    batch_size: int,
207    sample_ids: Optional[Sequence[int]] = None,
208    offsets: Optional[List[List[int]]] = None,
209    boundaries: bool = False,
210    binary: bool = False,
211    rois: Dict[int, Any] = {},
212    download: bool = False,
213    **kwargs
214) -> DataLoader:
215    """Get the dataloader for cilia segmentation in platynereis.
216
217    Args:
218        path: Filepath to a folder where the downloaded data will be saved.
219        patch_shape: The patch shape to use for training.
220        batch_size: The batch size for training.
221        sample_ids: The sample ids to use for the dataset
222        offsets: Offset values for affinity computation used as target.
223        boundaries: Whether to compute boundaries as the target.
224        binary: Whether to return a binary segmentation target.
225        rois: The region of interests to use for the data blocks.
226        download: Whether to download the data if it is not present.
227        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
228
229    Returns:
230        The DataLoader.
231    """
232    ds_kwargs, loader_kwargs = util.split_kwargs(
233        torch_em.default_segmentation_dataset, **kwargs
234    )
235    ds = get_platynereis_cilia_dataset(
236        path, patch_shape, sample_ids=sample_ids,
237        offsets=offsets, boundaries=boundaries, binary=binary,
238        rois=rois, download=download, **ds_kwargs,
239    )
240    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the dataloader for cilia segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_ids: The sample ids to use for the dataset
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • rois: The region of interests to use for the data blocks.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.

def get_platynereis_cell_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[int, Any] = {}, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
243def get_platynereis_cell_dataset(
244    path: Union[os.PathLike, str],
245    patch_shape: Tuple[int, int, int],
246    sample_ids: Optional[Sequence[int]] = None,
247    offsets: Optional[List[List[int]]] = None,
248    boundaries: bool = False,
249    rois: Dict[int, Any] = {},
250    download: bool = False,
251    **kwargs
252) -> Dataset:
253    """Get the dataset for cell segmentation in platynereis.
254
255    Args:
256        path: Filepath to a folder where the downloaded data will be saved.
257        patch_shape: The patch shape to use for training.
258        sample_ids: The sample ids to use for the dataset
259        offsets: Offset values for affinity computation used as target.
260        boundaries: Whether to compute boundaries as the target.
261        rois: The region of interests to use for the data blocks.
262        download: Whether to download the data if it is not present.
263        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
264
265    Returns:
266        The segmentation dataset.
267    """
268    cell_root, n_files = get_platy_data(path, "cells", download)
269
270    template = os.path.join(cell_root, "train_data_membrane_%02i.n5")
271    data_paths, data_rois = _get_paths_and_rois(sample_ids, n_files, template, rois)
272
273    kwargs = util.update_kwargs(kwargs, "rois", data_rois)
274    kwargs, _ = util.add_instance_label_transform(
275        kwargs, add_binary_target=False, boundaries=boundaries, offsets=offsets,
276    )
277
278    raw_key = "volumes/raw/s1"
279    label_key = "volumes/labels/segmentation/s1"
280    return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape,  **kwargs)

Get the dataset for cell segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_ids: The sample ids to use for the dataset
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • rois: The region of interests to use for the data blocks.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_platynereis_cell_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, rois: Dict[int, Any] = {}, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
283def get_platynereis_cell_loader(
284    path: Union[os.PathLike, str],
285    patch_shape: Tuple[int, int, int],
286    batch_size: int,
287    sample_ids: Optional[Sequence[int]] = None,
288    offsets: Optional[List[List[int]]] = None,
289    boundaries: bool = False,
290    rois: Dict[int, Any] = {},
291    download: bool = False,
292    **kwargs
293) -> DataLoader:
294    """Get the dataloader for cell segmentation in platynereis.
295
296    Args:
297        path: Filepath to a folder where the downloaded data will be saved.
298        patch_shape: The patch shape to use for training.
299        batch_size: The batch size for training.
300        sample_ids: The sample ids to use for the dataset
301        offsets: Offset values for affinity computation used as target.
302        boundaries: Whether to compute boundaries as the target.
303        rois: The region of interests to use for the data blocks.
304        download: Whether to download the data if it is not present.
305        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
306
307    Returns:
308        The DataLoader.
309    """
310    ds_kwargs, loader_kwargs = util.split_kwargs(
311        torch_em.default_segmentation_dataset, **kwargs
312    )
313    ds = get_platynereis_cell_dataset(
314        path, patch_shape, sample_ids, rois=rois,
315        offsets=offsets, boundaries=boundaries, download=download,
316        **ds_kwargs,
317    )
318    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the dataloader for cell segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_ids: The sample ids to use for the dataset
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • rois: The region of interests to use for the data blocks.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.

def get_platynereis_nuclei_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], sample_ids: Optional[Sequence[int]] = None, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, rois: Dict[int, Any] = {}, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
321def get_platynereis_nuclei_dataset(
322    path: Union[os.PathLike, str],
323    patch_shape: Tuple[int, int, int],
324    sample_ids: Optional[Sequence[int]] = None,
325    offsets: Optional[List[List[int]]] = None,
326    boundaries: bool = False,
327    binary: bool = False,
328    rois: Dict[int, Any] = {},
329    download: bool = False,
330    **kwargs
331) -> Dataset:
332    """Get the dataset for nucleus segmentation in platynereis.
333
334    Args:
335        path: Filepath to a folder where the downloaded data will be saved.
336        patch_shape: The patch shape to use for training.
337        sample_ids: The sample ids to use for the dataset
338        offsets: Offset values for affinity computation used as target.
339        boundaries: Whether to compute boundaries as the target.
340        binary: Whether to return a binary segmentation target.
341        rois: The region of interests to use for the data blocks.
342        download: Whether to download the data if it is not present.
343        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
344
345    Returns:
346        The segmentation dataset.
347    """
348    nuc_root, n_files = get_platy_data(path, "nuclei", download)
349
350    if sample_ids is None:
351        sample_ids = list(range(1, n_files + 1))
352    assert min(sample_ids) >= 1 and max(sample_ids) <= n_files
353    sample_ids.sort()
354
355    template = os.path.join(nuc_root, "train_data_nuclei_%02i.h5")
356    data_paths, data_rois = _get_paths_and_rois(sample_ids, n_files, template, rois)
357
358    kwargs = util.update_kwargs(kwargs, "is_seg_dataset", True)
359    kwargs = util.update_kwargs(kwargs, "rois", data_rois)
360    kwargs, _ = util.add_instance_label_transform(
361        kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary,
362    )
363
364    raw_key = "volumes/raw"
365    label_key = "volumes/labels/nucleus_instance_labels"
366    return torch_em.default_segmentation_dataset(data_paths, raw_key, data_paths, label_key, patch_shape, **kwargs)

Get the dataset for nucleus segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • sample_ids: The sample ids to use for the dataset
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • rois: The region of interests to use for the data blocks.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_platynereis_nuclei_loader( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], batch_size: int, sample_ids: Optional[Sequence[int]] = None, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, rois: Dict[int, Any] = {}, download: bool = False, **kwargs):
369def get_platynereis_nuclei_loader(
370    path: Union[os.PathLike, str],
371    patch_shape: Tuple[int, int, int],
372    batch_size: int,
373    sample_ids: Optional[Sequence[int]] = None,
374    offsets: Optional[List[List[int]]] = None,
375    boundaries: bool = False,
376    binary: bool = False,
377    rois: Dict[int, Any] = {},
378    download: bool = False,
379    **kwargs
380):
381    """Get the dataloader for nucleus segmentation in platynereis.
382
383    Args:
384        path: Filepath to a folder where the downloaded data will be saved.
385        patch_shape: The patch shape to use for training.
386        batch_size: The batch size for training.
387        sample_ids: The sample ids to use for the dataset
388        offsets: Offset values for affinity computation used as target.
389        boundaries: Whether to compute boundaries as the target.
390        binary: Whether to return a binary segmentation target.
391        rois: The region of interests to use for the data blocks.
392        download: Whether to download the data if it is not present.
393        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
394
395    Returns:
396        The DataLoader.
397    """
398    ds_kwargs, loader_kwargs = util.split_kwargs(
399        torch_em.default_segmentation_dataset, **kwargs
400    )
401    ds = get_platynereis_nuclei_dataset(
402        path, patch_shape, sample_ids=sample_ids, rois=rois,
403        offsets=offsets, boundaries=boundaries, binary=binary, download=download,
404        **ds_kwargs,
405    )
406    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the dataloader for nucleus segmentation in platynereis.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • sample_ids: The sample ids to use for the dataset
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • rois: The region of interests to use for the data blocks.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.