torch_em.data.datasets.light_microscopy.wing_disc

The Wing Disc dataset contains annotations for 3D cell instance segmentation in confocal microscopy images of Drosophila wing discs.

The dataset is located at https://www.ebi.ac.uk/biostudies/BioImages/studies/S-BIAD843. This dataset is from the publication https://www.nature.com/articles/s44303-025-00099-7. Please cite it if you use this dataset in your research.

  1"""The Wing Disc dataset contains annotations for 3D cell instance segmentation
  2in confocal microscopy images of Drosophila wing discs.
  3
  4The dataset is located at https://www.ebi.ac.uk/biostudies/BioImages/studies/S-BIAD843.
  5This dataset is from the publication https://www.nature.com/articles/s44303-025-00099-7.
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, Optional, List
 13
 14import numpy as np
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18import torch_em
 19
 20from .. import util
 21
 22
 23BASE_URL = "https://ftp.ebi.ac.uk/biostudies/fire/S-BIAD/843/S-BIAD843/Files"
 24
 25VOLUMES = {
 26    "WD1_15-02_WT_confocalonly": "confocal",
 27    "WD2.1_21-02_WT_confocalonly": "confocal",
 28    "WD1.1_17-03_WT_MP": "multiphoton",
 29    "WD3.2_21-03_WT_MP": "multiphoton",
 30}
 31
 32
 33def _preprocess_volumes(path, data_dir):
 34    """Convert OME-Zarr volumes to HDF5 files with raw and labels datasets."""
 35    import h5py
 36    import zarr
 37
 38    os.makedirs(data_dir, exist_ok=True)
 39
 40    zarr_dir = os.path.join(path, "zarr")
 41
 42    for name in VOLUMES:
 43        h5_path = os.path.join(data_dir, f"{name}.h5")
 44        if os.path.exists(h5_path):
 45            continue
 46
 47        # Read raw volume: shape (1, 1, Z, Y, X) and squeeze to (Z, Y, X).
 48        raw_zarr = os.path.join(zarr_dir, f"{name}.zarr", "0", "0")
 49        raw = np.array(zarr.open(store=zarr.storage.LocalStore(raw_zarr)))
 50        raw = raw.squeeze()
 51
 52        # Read segmentation: shape (Z, 1, 1, Y, X) and squeeze to (Z, Y, X).
 53        seg_zarr = os.path.join(zarr_dir, f"{name}_segmented.zarr", "0", "0")
 54        seg = np.array(zarr.open(store=zarr.storage.LocalStore(seg_zarr)))
 55        seg = seg.squeeze().astype("uint32")
 56
 57        assert raw.shape == seg.shape, f"Shape mismatch for {name}: raw={raw.shape}, seg={seg.shape}"
 58
 59        with h5py.File(h5_path, "w") as f:
 60            f.create_dataset("raw", data=raw, compression="gzip")
 61            f.create_dataset("labels", data=seg, compression="gzip")
 62
 63
 64def get_wing_disc_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 65    """Download the Wing Disc dataset.
 66
 67    Args:
 68        path: Filepath to a folder where the downloaded data will be saved.
 69        download: Whether to download the data if it is not present.
 70
 71    Returns:
 72        The filepath to the preprocessed data directory.
 73    """
 74    data_dir = os.path.join(path, "data")
 75    if os.path.exists(data_dir) and len(glob(os.path.join(data_dir, "*.h5"))) == len(VOLUMES):
 76        return data_dir
 77
 78    zarr_dir = os.path.join(path, "zarr")
 79    os.makedirs(zarr_dir, exist_ok=True)
 80
 81    for name in VOLUMES:
 82        zarr_path = os.path.join(zarr_dir, f"{name}.zarr")
 83        if not os.path.exists(zarr_path):
 84            zip_fname = f"{name}.ome.zarr.zip"
 85            zip_path = os.path.join(path, zip_fname)
 86            url = f"{BASE_URL}/{zip_fname}"
 87            util.download_source(path=zip_path, url=url, download=download, checksum=None)
 88            util.unzip(zip_path=zip_path, dst=zarr_dir)
 89
 90        seg_zarr_path = os.path.join(zarr_dir, f"{name}_segmented.zarr")
 91        if not os.path.exists(seg_zarr_path):
 92            seg_zip_fname = f"{name}_segmented.ome.zarr.zip"
 93            seg_zip_path = os.path.join(path, seg_zip_fname)
 94            seg_url = f"{BASE_URL}/{seg_zip_fname}"
 95            util.download_source(path=seg_zip_path, url=seg_url, download=download, checksum=None)
 96            util.unzip(zip_path=seg_zip_path, dst=zarr_dir)
 97
 98    _preprocess_volumes(path, data_dir)
 99
100    return data_dir
101
102
103def get_wing_disc_paths(
104    path: Union[os.PathLike, str],
105    download: bool = False,
106) -> List[str]:
107    """Get paths to the Wing Disc data.
108
109    Args:
110        path: Filepath to a folder where the downloaded data will be saved.
111        download: Whether to download the data if it is not present.
112
113    Returns:
114        List of filepaths for the stored data.
115    """
116    data_dir = get_wing_disc_data(path, download)
117    data_paths = natsorted(glob(os.path.join(data_dir, "*.h5")))
118    assert len(data_paths) > 0
119    return data_paths
120
121
122def get_wing_disc_dataset(
123    path: Union[os.PathLike, str],
124    patch_shape: Tuple[int, int, int],
125    offsets: Optional[List[List[int]]] = None,
126    boundaries: bool = False,
127    binary: bool = False,
128    download: bool = False,
129    **kwargs
130) -> Dataset:
131    """Get the Wing Disc dataset for 3D cell segmentation in Drosophila wing discs.
132
133    Args:
134        path: Filepath to a folder where the downloaded data will be saved.
135        patch_shape: The patch shape to use for training.
136        offsets: Offset values for affinity computation used as target.
137        boundaries: Whether to compute boundaries as the target.
138        binary: Whether to use a binary segmentation target.
139        download: Whether to download the data if it is not present.
140        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
141
142    Returns:
143        The segmentation dataset.
144    """
145    data_paths = get_wing_disc_paths(path, download)
146
147    kwargs = util.ensure_transforms(ndim=3, **kwargs)
148    kwargs, _ = util.add_instance_label_transform(
149        kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary
150    )
151
152    return torch_em.default_segmentation_dataset(
153        raw_paths=data_paths,
154        raw_key="raw",
155        label_paths=data_paths,
156        label_key="labels",
157        patch_shape=patch_shape,
158        ndim=3,
159        **kwargs
160    )
161
162
163def get_wing_disc_loader(
164    path: Union[os.PathLike, str],
165    batch_size: int,
166    patch_shape: Tuple[int, int, int],
167    offsets: Optional[List[List[int]]] = None,
168    boundaries: bool = False,
169    binary: bool = False,
170    download: bool = False,
171    **kwargs
172) -> DataLoader:
173    """Get the Wing Disc dataloader for 3D cell segmentation in Drosophila wing discs.
174
175    Args:
176        path: Filepath to a folder where the downloaded data will be saved.
177        batch_size: The batch size for training.
178        patch_shape: The patch shape to use for training.
179        offsets: Offset values for affinity computation used as target.
180        boundaries: Whether to compute boundaries as the target.
181        binary: Whether to use a binary segmentation target.
182        download: Whether to download the data if it is not present.
183        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
184
185    Returns:
186        The DataLoader.
187    """
188    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
189    dataset = get_wing_disc_dataset(
190        path=path,
191        patch_shape=patch_shape,
192        offsets=offsets,
193        boundaries=boundaries,
194        binary=binary,
195        download=download,
196        **ds_kwargs,
197    )
198    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
BASE_URL = 'https://ftp.ebi.ac.uk/biostudies/fire/S-BIAD/843/S-BIAD843/Files'
VOLUMES = {'WD1_15-02_WT_confocalonly': 'confocal', 'WD2.1_21-02_WT_confocalonly': 'confocal', 'WD1.1_17-03_WT_MP': 'multiphoton', 'WD3.2_21-03_WT_MP': 'multiphoton'}
def get_wing_disc_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 65def get_wing_disc_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 66    """Download the Wing Disc dataset.
 67
 68    Args:
 69        path: Filepath to a folder where the downloaded data will be saved.
 70        download: Whether to download the data if it is not present.
 71
 72    Returns:
 73        The filepath to the preprocessed data directory.
 74    """
 75    data_dir = os.path.join(path, "data")
 76    if os.path.exists(data_dir) and len(glob(os.path.join(data_dir, "*.h5"))) == len(VOLUMES):
 77        return data_dir
 78
 79    zarr_dir = os.path.join(path, "zarr")
 80    os.makedirs(zarr_dir, exist_ok=True)
 81
 82    for name in VOLUMES:
 83        zarr_path = os.path.join(zarr_dir, f"{name}.zarr")
 84        if not os.path.exists(zarr_path):
 85            zip_fname = f"{name}.ome.zarr.zip"
 86            zip_path = os.path.join(path, zip_fname)
 87            url = f"{BASE_URL}/{zip_fname}"
 88            util.download_source(path=zip_path, url=url, download=download, checksum=None)
 89            util.unzip(zip_path=zip_path, dst=zarr_dir)
 90
 91        seg_zarr_path = os.path.join(zarr_dir, f"{name}_segmented.zarr")
 92        if not os.path.exists(seg_zarr_path):
 93            seg_zip_fname = f"{name}_segmented.ome.zarr.zip"
 94            seg_zip_path = os.path.join(path, seg_zip_fname)
 95            seg_url = f"{BASE_URL}/{seg_zip_fname}"
 96            util.download_source(path=seg_zip_path, url=seg_url, download=download, checksum=None)
 97            util.unzip(zip_path=seg_zip_path, dst=zarr_dir)
 98
 99    _preprocess_volumes(path, data_dir)
100
101    return data_dir

Download the Wing Disc dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the preprocessed data directory.

def get_wing_disc_paths(path: Union[os.PathLike, str], download: bool = False) -> List[str]:
104def get_wing_disc_paths(
105    path: Union[os.PathLike, str],
106    download: bool = False,
107) -> List[str]:
108    """Get paths to the Wing Disc data.
109
110    Args:
111        path: Filepath to a folder where the downloaded data will be saved.
112        download: Whether to download the data if it is not present.
113
114    Returns:
115        List of filepaths for the stored data.
116    """
117    data_dir = get_wing_disc_data(path, download)
118    data_paths = natsorted(glob(os.path.join(data_dir, "*.h5")))
119    assert len(data_paths) > 0
120    return data_paths

Get paths to the Wing Disc data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the stored data.

def get_wing_disc_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
123def get_wing_disc_dataset(
124    path: Union[os.PathLike, str],
125    patch_shape: Tuple[int, int, int],
126    offsets: Optional[List[List[int]]] = None,
127    boundaries: bool = False,
128    binary: bool = False,
129    download: bool = False,
130    **kwargs
131) -> Dataset:
132    """Get the Wing Disc dataset for 3D cell segmentation in Drosophila wing discs.
133
134    Args:
135        path: Filepath to a folder where the downloaded data will be saved.
136        patch_shape: The patch shape to use for training.
137        offsets: Offset values for affinity computation used as target.
138        boundaries: Whether to compute boundaries as the target.
139        binary: Whether to use a binary segmentation target.
140        download: Whether to download the data if it is not present.
141        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
142
143    Returns:
144        The segmentation dataset.
145    """
146    data_paths = get_wing_disc_paths(path, download)
147
148    kwargs = util.ensure_transforms(ndim=3, **kwargs)
149    kwargs, _ = util.add_instance_label_transform(
150        kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary
151    )
152
153    return torch_em.default_segmentation_dataset(
154        raw_paths=data_paths,
155        raw_key="raw",
156        label_paths=data_paths,
157        label_key="labels",
158        patch_shape=patch_shape,
159        ndim=3,
160        **kwargs
161    )

Get the Wing Disc dataset for 3D cell segmentation in Drosophila wing discs.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_wing_disc_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
164def get_wing_disc_loader(
165    path: Union[os.PathLike, str],
166    batch_size: int,
167    patch_shape: Tuple[int, int, int],
168    offsets: Optional[List[List[int]]] = None,
169    boundaries: bool = False,
170    binary: bool = False,
171    download: bool = False,
172    **kwargs
173) -> DataLoader:
174    """Get the Wing Disc dataloader for 3D cell segmentation in Drosophila wing discs.
175
176    Args:
177        path: Filepath to a folder where the downloaded data will be saved.
178        batch_size: The batch size for training.
179        patch_shape: The patch shape to use for training.
180        offsets: Offset values for affinity computation used as target.
181        boundaries: Whether to compute boundaries as the target.
182        binary: Whether to use a binary segmentation target.
183        download: Whether to download the data if it is not present.
184        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
185
186    Returns:
187        The DataLoader.
188    """
189    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
190    dataset = get_wing_disc_dataset(
191        path=path,
192        patch_shape=patch_shape,
193        offsets=offsets,
194        boundaries=boundaries,
195        binary=binary,
196        download=download,
197        **ds_kwargs,
198    )
199    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the Wing Disc dataloader for 3D cell segmentation in Drosophila wing discs.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • offsets: Offset values for affinity computation used as target.
  • boundaries: Whether to compute boundaries as the target.
  • binary: Whether to use a binary segmentation target.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.