torch_em.data.datasets.electron_microscopy.uro_cell

The UroCell dataset contains segmentation annotations for the following organelles:

  • Food Vacuoles
  • Golgi Apparatus
  • Lysosomes
  • Mitochondria

It contains several FIB-SEM volumes with annotations. This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. Please cite it if you use this dataset for a publication.

  1"""The UroCell dataset contains segmentation annotations for the following organelles:
  2- Food Vacuoles
  3- Golgi Apparatus
  4- Lysosomes
  5- Mitochondria
  6
  7It contains several FIB-SEM volumes with annotations.
  8This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693.
  9Please cite it if you use this dataset for a publication.
 10"""
 11
 12import os
 13import warnings
 14from glob import glob
 15from shutil import rmtree
 16from typing import List, Optional, Union, Tuple
 17
 18import h5py
 19import torch_em
 20from torch.utils.data import Dataset, DataLoader
 21from .. import util
 22
 23
 24URL = "https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip"
 25CHECKSUM = "a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc"
 26
 27
 28def get_urocell_data(path: Union[os.PathLike, str], download: bool) -> str:
 29    """Download the UroCell training data.
 30
 31    Args:
 32        path: Filepath to a folder where the downloaded data will be saved.
 33        download: Whether to download the data if it is not present.
 34
 35    Returns:
 36        The path to the downloaded data.
 37    """
 38    if os.path.exists(path):
 39        return path
 40
 41    try:
 42        import nibabel as nib
 43    except ImportError:
 44        raise RuntimeError("Please install the nibabel package.")
 45
 46    # Download and unzip the data.
 47    os.makedirs(path)
 48    tmp_path = os.path.join(path, "uro_cell.zip")
 49    util.download_source(tmp_path, URL, download, checksum=CHECKSUM)
 50    util.unzip(tmp_path, path, remove=True)
 51
 52    root = os.path.join(path, "UroCell-master")
 53
 54    files = glob(os.path.join(root, "data", "*.nii.gz"))
 55    files.sort()
 56    for data_path in files:
 57        fname = os.path.basename(data_path)
 58        data = nib.load(data_path).get_fdata()
 59
 60        out_path = os.path.join(path, fname.replace("nii.gz", "h5"))
 61        with h5py.File(out_path, "w") as f:
 62            f.create_dataset("raw", data=data, compression="gzip")
 63
 64            # Check if we have any of the organelle labels for this volume
 65            # and also copy them if yes.
 66            fv_path = os.path.join(root, "fv", "instance", fname)
 67            if os.path.exists(fv_path):
 68                fv = nib.load(fv_path).get_fdata().astype("uint32")
 69                assert fv.shape == data.shape
 70                f.create_dataset("labels/fv", data=fv, compression="gzip")
 71
 72            golgi_path = os.path.join(root, "golgi", "precise", fname)
 73            if os.path.exists(golgi_path):
 74                golgi = nib.load(golgi_path).get_fdata().astype("uint32")
 75                assert golgi.shape == data.shape
 76                f.create_dataset("labels/golgi", data=golgi, compression="gzip")
 77
 78            lyso_path = os.path.join(root, "lyso", "instance", fname)
 79            if os.path.exists(lyso_path):
 80                lyso = nib.load(lyso_path).get_fdata().astype("uint32")
 81                assert lyso.shape == data.shape
 82                f.create_dataset("labels/lyso", data=lyso, compression="gzip")
 83
 84            mito_path = os.path.join(root, "mito", "instance", fname)
 85            if os.path.exists(mito_path):
 86                mito = nib.load(mito_path).get_fdata().astype("uint32")
 87                assert mito.shape == data.shape
 88                f.create_dataset("labels/mito", data=mito, compression="gzip")
 89
 90    # Clean Up.
 91    rmtree(root)
 92    return path
 93
 94
 95def _get_paths(path, target):
 96    label_key = f"labels/{target}"
 97    all_paths = glob(os.path.join(path, "*.h5"))
 98    all_paths.sort()
 99    paths = [path for path in all_paths if label_key in h5py.File(path, "r")]
100    return paths, label_key
101
102
103def get_uro_cell_dataset(
104    path: Union[os.PathLike, str],
105    target: str,
106    patch_shape: Tuple[int, int, int],
107    download: bool = False,
108    offsets: Optional[List[List[int]]] = None,
109    boundaries: bool = False,
110    binary: bool = False,
111    **kwargs
112) -> Dataset:
113    """Get the UroCell dataset for organelle segmentation in FIB-SEM.
114
115    Args:
116        path: Filepath to a folder where the downloaded data will be saved.
117        target: The segmentation target, corresponding to the organelle to segment.
118            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
119        patch_shape: The patch shape to use for training.
120        download: Whether to download the data if it is not present.
121        offsets: Offset values for affinity computation used as target.
122        boundaries: Whether to compute boundaries as the target.
123        binary: Whether to return a binary segmentation target.
124        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
125
126    Returns:
127       The segmentation dataset.
128    """
129    assert target in ("fv", "golgi", "lyso", "mito")
130    get_urocell_data(path, download)
131    paths, label_key = _get_paths(path, target)
132
133    assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}"
134    if offsets is not None:
135        if target in ("lyso", "golgi"):
136            warnings.warn(
137                f"{target} does not have instance labels, affinities will be computed based on binary segmentation."
138            )
139        # we add a binary target channel for foreground background segmentation
140        label_transform = torch_em.transform.label.AffinityTransform(offsets=offsets,
141                                                                     ignore_label=None,
142                                                                     add_binary_target=True,
143                                                                     add_mask=True)
144        msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden."
145        kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg)
146    elif boundaries:
147        if target in ("lyso", "golgi"):
148            warnings.warn(
149                f"{target} does not have instance labels, boundaries will be computed based on binary segmentation."
150            )
151        label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True)
152        msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
153        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
154    elif binary:
155        label_transform = torch_em.transform.label.labels_to_binary
156        msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
157        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
158
159    raw_key = "raw"
160    return torch_em.default_segmentation_dataset(
161        paths, raw_key, paths, label_key, patch_shape, is_seg_dataset=True, **kwargs
162    )
163
164
165def get_uro_cell_loader(
166    path: Union[os.PathLike, str],
167    target: str,
168    patch_shape: Tuple[int, int, int],
169    batch_size: int,
170    download: bool = False,
171    offsets: Optional[List[List[int]]] = None,
172    boundaries: bool = False,
173    binary: bool = False,
174    **kwargs
175) -> DataLoader:
176    """Get the UroCell dataloader for organelle segmentation in FIB-SEM.
177
178    Args:
179        path: Filepath to a folder where the downloaded data will be saved.
180        target: The segmentation target, corresponding to the organelle to segment.
181            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
182        patch_shape: The patch shape to use for training.
183        batch_size: The batch size for training.
184        download: Whether to download the data if it is not present.
185        offsets: Offset values for affinity computation used as target.
186        boundaries: Whether to compute boundaries as the target.
187        binary: Whether to return a binary segmentation target.
188        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
189
190    Returns:
191       The DataLoader.
192    """
193    ds_kwargs, loader_kwargs = util.split_kwargs(
194        torch_em.default_segmentation_dataset, **kwargs
195    )
196    ds = get_uro_cell_dataset(
197        path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
198    )
199    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL = 'https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip'
CHECKSUM = 'a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc'
def get_urocell_data(path: Union[os.PathLike, str], download: bool) -> str:
29def get_urocell_data(path: Union[os.PathLike, str], download: bool) -> str:
30    """Download the UroCell training data.
31
32    Args:
33        path: Filepath to a folder where the downloaded data will be saved.
34        download: Whether to download the data if it is not present.
35
36    Returns:
37        The path to the downloaded data.
38    """
39    if os.path.exists(path):
40        return path
41
42    try:
43        import nibabel as nib
44    except ImportError:
45        raise RuntimeError("Please install the nibabel package.")
46
47    # Download and unzip the data.
48    os.makedirs(path)
49    tmp_path = os.path.join(path, "uro_cell.zip")
50    util.download_source(tmp_path, URL, download, checksum=CHECKSUM)
51    util.unzip(tmp_path, path, remove=True)
52
53    root = os.path.join(path, "UroCell-master")
54
55    files = glob(os.path.join(root, "data", "*.nii.gz"))
56    files.sort()
57    for data_path in files:
58        fname = os.path.basename(data_path)
59        data = nib.load(data_path).get_fdata()
60
61        out_path = os.path.join(path, fname.replace("nii.gz", "h5"))
62        with h5py.File(out_path, "w") as f:
63            f.create_dataset("raw", data=data, compression="gzip")
64
65            # Check if we have any of the organelle labels for this volume
66            # and also copy them if yes.
67            fv_path = os.path.join(root, "fv", "instance", fname)
68            if os.path.exists(fv_path):
69                fv = nib.load(fv_path).get_fdata().astype("uint32")
70                assert fv.shape == data.shape
71                f.create_dataset("labels/fv", data=fv, compression="gzip")
72
73            golgi_path = os.path.join(root, "golgi", "precise", fname)
74            if os.path.exists(golgi_path):
75                golgi = nib.load(golgi_path).get_fdata().astype("uint32")
76                assert golgi.shape == data.shape
77                f.create_dataset("labels/golgi", data=golgi, compression="gzip")
78
79            lyso_path = os.path.join(root, "lyso", "instance", fname)
80            if os.path.exists(lyso_path):
81                lyso = nib.load(lyso_path).get_fdata().astype("uint32")
82                assert lyso.shape == data.shape
83                f.create_dataset("labels/lyso", data=lyso, compression="gzip")
84
85            mito_path = os.path.join(root, "mito", "instance", fname)
86            if os.path.exists(mito_path):
87                mito = nib.load(mito_path).get_fdata().astype("uint32")
88                assert mito.shape == data.shape
89                f.create_dataset("labels/mito", data=mito, compression="gzip")
90
91    # Clean Up.
92    rmtree(root)
93    return path

Download the UroCell training data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The path to the downloaded data.

def get_uro_cell_dataset( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
104def get_uro_cell_dataset(
105    path: Union[os.PathLike, str],
106    target: str,
107    patch_shape: Tuple[int, int, int],
108    download: bool = False,
109    offsets: Optional[List[List[int]]] = None,
110    boundaries: bool = False,
111    binary: bool = False,
112    **kwargs
113) -> Dataset:
114    """Get the UroCell dataset for organelle segmentation in FIB-SEM.
115
116    Args:
117        path: Filepath to a folder where the downloaded data will be saved.
118        target: The segmentation target, corresponding to the organelle to segment.
119            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
120        patch_shape: The patch shape to use for training.
121        download: Whether to download the data if it is not present.
122        offsets: Offset values for affinity computation used as target.
123        boundaries: Whether to compute boundaries as the target.
124        binary: Whether to return a binary segmentation target.
125        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
126
127    Returns:
128       The segmentation dataset.
129    """
130    assert target in ("fv", "golgi", "lyso", "mito")
131    get_urocell_data(path, download)
132    paths, label_key = _get_paths(path, target)
133
134    assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}"
135    if offsets is not None:
136        if target in ("lyso", "golgi"):
137            warnings.warn(
138                f"{target} does not have instance labels, affinities will be computed based on binary segmentation."
139            )
140        # we add a binary target channel for foreground background segmentation
141        label_transform = torch_em.transform.label.AffinityTransform(offsets=offsets,
142                                                                     ignore_label=None,
143                                                                     add_binary_target=True,
144                                                                     add_mask=True)
145        msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden."
146        kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg)
147    elif boundaries:
148        if target in ("lyso", "golgi"):
149            warnings.warn(
150                f"{target} does not have instance labels, boundaries will be computed based on binary segmentation."
151            )
152        label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True)
153        msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
154        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
155    elif binary:
156        label_transform = torch_em.transform.label.labels_to_binary
157        msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
158        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
159
160    raw_key = "raw"
161    return torch_em.default_segmentation_dataset(
162        paths, raw_key, paths, label_key, patch_shape, is_seg_dataset=True, **kwargs
163    )

Get the UroCell dataset for organelle segmentation in FIB-SEM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_uro_cell_loader( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
166def get_uro_cell_loader(
167    path: Union[os.PathLike, str],
168    target: str,
169    patch_shape: Tuple[int, int, int],
170    batch_size: int,
171    download: bool = False,
172    offsets: Optional[List[List[int]]] = None,
173    boundaries: bool = False,
174    binary: bool = False,
175    **kwargs
176) -> DataLoader:
177    """Get the UroCell dataloader for organelle segmentation in FIB-SEM.
178
179    Args:
180        path: Filepath to a folder where the downloaded data will be saved.
181        target: The segmentation target, corresponding to the organelle to segment.
182            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
183        patch_shape: The patch shape to use for training.
184        batch_size: The batch size for training.
185        download: Whether to download the data if it is not present.
186        offsets: Offset values for affinity computation used as target.
187        boundaries: Whether to compute boundaries as the target.
188        binary: Whether to return a binary segmentation target.
189        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
190
191    Returns:
192       The DataLoader.
193    """
194    ds_kwargs, loader_kwargs = util.split_kwargs(
195        torch_em.default_segmentation_dataset, **kwargs
196    )
197    ds = get_uro_cell_dataset(
198        path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
199    )
200    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the UroCell dataloader for organelle segmentation in FIB-SEM.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • download: Whether to download the data if it is not present.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to return a binary segmentation target.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.