torch_em.data.datasets.electron_microscopy.uro_cell

The UroCell dataset contains segmentation annotations for the following organelles:

Food Vacuoles
Golgi Apparatus
Lysosomes
Mitochondria It contains several FIB-SEM volumes with annotations.

This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693. Please cite it if you use this dataset for a publication.

View Source

  1"""The UroCell dataset contains segmentation annotations for the following organelles:
  2- Food Vacuoles
  3- Golgi Apparatus
  4- Lysosomes
  5- Mitochondria
  6It contains several FIB-SEM volumes with annotations.
  7
  8This dataset is from the publication https://doi.org/10.1016/j.compbiomed.2020.103693.
  9Please cite it if you use this dataset for a publication.
 10"""
 11
 12import os
 13import warnings
 14from glob import glob
 15from shutil import rmtree
 16from typing import List, Optional, Union, Tuple
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25URL = "https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip"
 26CHECKSUM = "a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc"
 27
 28
 29def get_uro_cell_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 30    """Download the UroCell training data.
 31
 32    Args:
 33        path: Filepath to a folder where the downloaded data will be saved.
 34        download: Whether to download the data if it is not present.
 35
 36    Returns:
 37        The path to the downloaded data.
 38    """
 39    import h5py
 40
 41    if os.path.exists(path):
 42        return path
 43
 44    try:
 45        import nibabel as nib
 46    except ImportError:
 47        raise RuntimeError("Please install the nibabel package.")
 48
 49    # Download and unzip the data.
 50    os.makedirs(path)
 51    tmp_path = os.path.join(path, "uro_cell.zip")
 52    util.download_source(tmp_path, URL, download, checksum=CHECKSUM)
 53    util.unzip(tmp_path, path, remove=True)
 54
 55    root = os.path.join(path, "UroCell-master")
 56
 57    files = glob(os.path.join(root, "data", "*.nii.gz"))
 58    files.sort()
 59    for data_path in files:
 60        fname = os.path.basename(data_path)
 61        data = nib.load(data_path).get_fdata()
 62
 63        out_path = os.path.join(path, fname.replace("nii.gz", "h5"))
 64        with h5py.File(out_path, "w") as f:
 65            f.create_dataset("raw", data=data, compression="gzip")
 66
 67            # Check if we have any of the organelle labels for this volume
 68            # and also copy them if yes.
 69            fv_path = os.path.join(root, "fv", "instance", fname)
 70            if os.path.exists(fv_path):
 71                fv = nib.load(fv_path).get_fdata().astype("uint32")
 72                assert fv.shape == data.shape
 73                f.create_dataset("labels/fv", data=fv, compression="gzip")
 74
 75            golgi_path = os.path.join(root, "golgi", "precise", fname)
 76            if os.path.exists(golgi_path):
 77                golgi = nib.load(golgi_path).get_fdata().astype("uint32")
 78                assert golgi.shape == data.shape
 79                f.create_dataset("labels/golgi", data=golgi, compression="gzip")
 80
 81            lyso_path = os.path.join(root, "lyso", "instance", fname)
 82            if os.path.exists(lyso_path):
 83                lyso = nib.load(lyso_path).get_fdata().astype("uint32")
 84                assert lyso.shape == data.shape
 85                f.create_dataset("labels/lyso", data=lyso, compression="gzip")
 86
 87            mito_path = os.path.join(root, "mito", "instance", fname)
 88            if os.path.exists(mito_path):
 89                mito = nib.load(mito_path).get_fdata().astype("uint32")
 90                assert mito.shape == data.shape
 91                f.create_dataset("labels/mito", data=mito, compression="gzip")
 92
 93    # Clean Up.
 94    rmtree(root)
 95    return path
 96
 97
 98def get_uro_cell_paths(
 99    path: Union[os.PathLike], target: str, download: bool = False, return_label_key: bool = False,
100) -> List[str]:
101    """Get paths to the UroCell data.
102
103    Args:
104        path: Filepath to a folder where the downloaded data will be saved.
105        target: The segmentation target, corresponding to the organelle to segment.
106            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
107        download: Whether to download the data if it is not present.
108        return_label_key: Whether to return the label key.
109
110    Returns:
111        List of filepaths to the stored data.
112    """
113    import h5py
114
115    get_uro_cell_data(path, download)
116
117    label_key = f"labels/{target}"
118    all_paths = glob(os.path.join(path, "*.h5"))
119    all_paths.sort()
120    paths = [path for path in all_paths if label_key in h5py.File(path, "r")]
121
122    if return_label_key:
123        return paths, label_key
124    else:
125        return paths
126
127
128def get_uro_cell_dataset(
129    path: Union[os.PathLike, str],
130    target: str,
131    patch_shape: Tuple[int, int, int],
132    download: bool = False,
133    offsets: Optional[List[List[int]]] = None,
134    boundaries: bool = False,
135    binary: bool = False,
136    **kwargs
137) -> Dataset:
138    """Get the UroCell dataset for organelle segmentation in FIB-SEM.
139
140    Args:
141        path: Filepath to a folder where the downloaded data will be saved.
142        target: The segmentation target, corresponding to the organelle to segment.
143            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
144        patch_shape: The patch shape to use for training.
145        download: Whether to download the data if it is not present.
146        offsets: Offset values for affinity computation used as target.
147        boundaries: Whether to compute boundaries as the target.
148        binary: Whether to return a binary segmentation target.
149        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
150
151    Returns:
152       The segmentation dataset.
153    """
154    assert target in ("fv", "golgi", "lyso", "mito")
155
156    paths, label_key = get_uro_cell_paths(path, target, download, return_label_key=True)
157
158    assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}"
159    if offsets is not None:
160        if target in ("lyso", "golgi"):
161            warnings.warn(
162                f"{target} does not have instance labels, affinities will be computed based on binary segmentation."
163            )
164        # we add a binary target channel for foreground background segmentation
165        label_transform = torch_em.transform.label.AffinityTransform(
166            offsets=offsets, ignore_label=None, add_binary_target=True, add_mask=True
167        )
168        msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden."
169        kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg)
170    elif boundaries:
171        if target in ("lyso", "golgi"):
172            warnings.warn(
173                f"{target} does not have instance labels, boundaries will be computed based on binary segmentation."
174            )
175        label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True)
176        msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
177        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
178    elif binary:
179        label_transform = torch_em.transform.label.labels_to_binary
180        msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
181        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
182
183    return torch_em.default_segmentation_dataset(
184        raw_paths=paths,
185        raw_key="raw",
186        label_paths=paths,
187        label_key=label_key,
188        patch_shape=patch_shape,
189        is_seg_dataset=True,
190        **kwargs
191    )
192
193
194def get_uro_cell_loader(
195    path: Union[os.PathLike, str],
196    target: str,
197    patch_shape: Tuple[int, int, int],
198    batch_size: int,
199    download: bool = False,
200    offsets: Optional[List[List[int]]] = None,
201    boundaries: bool = False,
202    binary: bool = False,
203    **kwargs
204) -> DataLoader:
205    """Get the UroCell dataloader for organelle segmentation in FIB-SEM.
206
207    Args:
208        path: Filepath to a folder where the downloaded data will be saved.
209        target: The segmentation target, corresponding to the organelle to segment.
210            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
211        patch_shape: The patch shape to use for training.
212        batch_size: The batch size for training.
213        download: Whether to download the data if it is not present.
214        offsets: Offset values for affinity computation used as target.
215        boundaries: Whether to compute boundaries as the target.
216        binary: Whether to return a binary segmentation target.
217        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
218
219    Returns:
220       The DataLoader.
221    """
222    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
223    ds = get_uro_cell_dataset(
224        path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
225    )
226    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

URL = 'https://github.com/MancaZerovnikMekuc/UroCell/archive/refs/heads/master.zip'

CHECKSUM = 'a48cf31b06114d7def642742b4fcbe76103483c069122abe10f377d71a1acabc'

def get_uro_cell_data(path: Union[os.PathLike, str], download: bool = False) -> str: View Source

30def get_uro_cell_data(path: Union[os.PathLike, str], download: bool = False) -> str:
31    """Download the UroCell training data.
32
33    Args:
34        path: Filepath to a folder where the downloaded data will be saved.
35        download: Whether to download the data if it is not present.
36
37    Returns:
38        The path to the downloaded data.
39    """
40    import h5py
41
42    if os.path.exists(path):
43        return path
44
45    try:
46        import nibabel as nib
47    except ImportError:
48        raise RuntimeError("Please install the nibabel package.")
49
50    # Download and unzip the data.
51    os.makedirs(path)
52    tmp_path = os.path.join(path, "uro_cell.zip")
53    util.download_source(tmp_path, URL, download, checksum=CHECKSUM)
54    util.unzip(tmp_path, path, remove=True)
55
56    root = os.path.join(path, "UroCell-master")
57
58    files = glob(os.path.join(root, "data", "*.nii.gz"))
59    files.sort()
60    for data_path in files:
61        fname = os.path.basename(data_path)
62        data = nib.load(data_path).get_fdata()
63
64        out_path = os.path.join(path, fname.replace("nii.gz", "h5"))
65        with h5py.File(out_path, "w") as f:
66            f.create_dataset("raw", data=data, compression="gzip")
67
68            # Check if we have any of the organelle labels for this volume
69            # and also copy them if yes.
70            fv_path = os.path.join(root, "fv", "instance", fname)
71            if os.path.exists(fv_path):
72                fv = nib.load(fv_path).get_fdata().astype("uint32")
73                assert fv.shape == data.shape
74                f.create_dataset("labels/fv", data=fv, compression="gzip")
75
76            golgi_path = os.path.join(root, "golgi", "precise", fname)
77            if os.path.exists(golgi_path):
78                golgi = nib.load(golgi_path).get_fdata().astype("uint32")
79                assert golgi.shape == data.shape
80                f.create_dataset("labels/golgi", data=golgi, compression="gzip")
81
82            lyso_path = os.path.join(root, "lyso", "instance", fname)
83            if os.path.exists(lyso_path):
84                lyso = nib.load(lyso_path).get_fdata().astype("uint32")
85                assert lyso.shape == data.shape
86                f.create_dataset("labels/lyso", data=lyso, compression="gzip")
87
88            mito_path = os.path.join(root, "mito", "instance", fname)
89            if os.path.exists(mito_path):
90                mito = nib.load(mito_path).get_fdata().astype("uint32")
91                assert mito.shape == data.shape
92                f.create_dataset("labels/mito", data=mito, compression="gzip")
93
94    # Clean Up.
95    rmtree(root)
96    return path

Download the UroCell training data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
download: Whether to download the data if it is not present.

Returns:

The path to the downloaded data.

def get_uro_cell_paths( path: os.PathLike, target: str, download: bool = False, return_label_key: bool = False) -> List[str]: View Source

 99def get_uro_cell_paths(
100    path: Union[os.PathLike], target: str, download: bool = False, return_label_key: bool = False,
101) -> List[str]:
102    """Get paths to the UroCell data.
103
104    Args:
105        path: Filepath to a folder where the downloaded data will be saved.
106        target: The segmentation target, corresponding to the organelle to segment.
107            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
108        download: Whether to download the data if it is not present.
109        return_label_key: Whether to return the label key.
110
111    Returns:
112        List of filepaths to the stored data.
113    """
114    import h5py
115
116    get_uro_cell_data(path, download)
117
118    label_key = f"labels/{target}"
119    all_paths = glob(os.path.join(path, "*.h5"))
120    all_paths.sort()
121    paths = [path for path in all_paths if label_key in h5py.File(path, "r")]
122
123    if return_label_key:
124        return paths, label_key
125    else:
126        return paths

Get paths to the UroCell data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
download: Whether to download the data if it is not present.
return_label_key: Whether to return the label key.

Returns:

List of filepaths to the stored data.

def get_uro_cell_dataset( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

129def get_uro_cell_dataset(
130    path: Union[os.PathLike, str],
131    target: str,
132    patch_shape: Tuple[int, int, int],
133    download: bool = False,
134    offsets: Optional[List[List[int]]] = None,
135    boundaries: bool = False,
136    binary: bool = False,
137    **kwargs
138) -> Dataset:
139    """Get the UroCell dataset for organelle segmentation in FIB-SEM.
140
141    Args:
142        path: Filepath to a folder where the downloaded data will be saved.
143        target: The segmentation target, corresponding to the organelle to segment.
144            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
145        patch_shape: The patch shape to use for training.
146        download: Whether to download the data if it is not present.
147        offsets: Offset values for affinity computation used as target.
148        boundaries: Whether to compute boundaries as the target.
149        binary: Whether to return a binary segmentation target.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
151
152    Returns:
153       The segmentation dataset.
154    """
155    assert target in ("fv", "golgi", "lyso", "mito")
156
157    paths, label_key = get_uro_cell_paths(path, target, download, return_label_key=True)
158
159    assert sum((offsets is not None, boundaries, binary)) <= 1, f"{offsets}, {boundaries}, {binary}"
160    if offsets is not None:
161        if target in ("lyso", "golgi"):
162            warnings.warn(
163                f"{target} does not have instance labels, affinities will be computed based on binary segmentation."
164            )
165        # we add a binary target channel for foreground background segmentation
166        label_transform = torch_em.transform.label.AffinityTransform(
167            offsets=offsets, ignore_label=None, add_binary_target=True, add_mask=True
168        )
169        msg = "Offsets are passed, but 'label_transform2' is in the kwargs. It will be over-ridden."
170        kwargs = util.update_kwargs(kwargs, 'label_transform2', label_transform, msg=msg)
171    elif boundaries:
172        if target in ("lyso", "golgi"):
173            warnings.warn(
174                f"{target} does not have instance labels, boundaries will be computed based on binary segmentation."
175            )
176        label_transform = torch_em.transform.label.BoundaryTransform(add_binary_target=True)
177        msg = "Boundaries is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
178        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
179    elif binary:
180        label_transform = torch_em.transform.label.labels_to_binary
181        msg = "Binary is set to true, but 'label_transform' is in the kwargs. It will be over-ridden."
182        kwargs = util.update_kwargs(kwargs, 'label_transform', label_transform, msg=msg)
183
184    return torch_em.default_segmentation_dataset(
185        raw_paths=paths,
186        raw_key="raw",
187        label_paths=paths,
188        label_key=label_key,
189        patch_shape=patch_shape,
190        is_seg_dataset=True,
191        **kwargs
192    )

Get the UroCell dataset for organelle segmentation in FIB-SEM.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
patch_shape: The patch shape to use for training.
download: Whether to download the data if it is not present.
offsets: Offset values for affinity computation used as target.
boundaries: Whether to compute boundaries as the target.
binary: Whether to return a binary segmentation target.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_uro_cell_loader( path: Union[os.PathLike, str], target: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

195def get_uro_cell_loader(
196    path: Union[os.PathLike, str],
197    target: str,
198    patch_shape: Tuple[int, int, int],
199    batch_size: int,
200    download: bool = False,
201    offsets: Optional[List[List[int]]] = None,
202    boundaries: bool = False,
203    binary: bool = False,
204    **kwargs
205) -> DataLoader:
206    """Get the UroCell dataloader for organelle segmentation in FIB-SEM.
207
208    Args:
209        path: Filepath to a folder where the downloaded data will be saved.
210        target: The segmentation target, corresponding to the organelle to segment.
211            Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
212        patch_shape: The patch shape to use for training.
213        batch_size: The batch size for training.
214        download: Whether to download the data if it is not present.
215        offsets: Offset values for affinity computation used as target.
216        boundaries: Whether to compute boundaries as the target.
217        binary: Whether to return a binary segmentation target.
218        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
219
220    Returns:
221       The DataLoader.
222    """
223    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
224    ds = get_uro_cell_dataset(
225        path, target, patch_shape, download=download, offsets=offsets, boundaries=boundaries, binary=binary, **ds_kwargs
226    )
227    return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)

Get the UroCell dataloader for organelle segmentation in FIB-SEM.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
target: The segmentation target, corresponding to the organelle to segment. Available organelles are 'fv', 'golgi', 'lyso' and 'mito'.
patch_shape: The patch shape to use for training.
batch_size: The batch size for training.
download: Whether to download the data if it is not present.
offsets: Offset values for affinity computation used as target.
boundaries: Whether to compute boundaries as the target.
binary: Whether to return a binary segmentation target.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.