torch_em.data.datasets.light_microscopy.slimia

The SLiMIA dataset contains annotations for spheroid segmentation in light microscopy images from 9 different microscopes and 47 cell lines.

NOTE: The annotations are semantic segmentation of spheroids.

The dataset provides images with binary manual segmentation masks of spheroids formed using liquid overlay and hanging drop techniques.

The dataset is located at https://figshare.com/collections/The_Spheroid_Light_Microscopy_Image_Atlas_SLiMIA_for_morphometrical_analysis_of_three_dimensional_cell_cultures/7486311. This dataset is from the publication https://doi.org/10.1038/s41597-025-04441-x. Please cite it if you use this dataset in your research.

  1"""The SLiMIA dataset contains annotations for spheroid segmentation
  2in light microscopy images from 9 different microscopes and 47 cell lines.
  3
  4NOTE: The annotations are semantic segmentation of spheroids.
  5
  6The dataset provides images with binary manual segmentation masks of spheroids
  7formed using liquid overlay and hanging drop techniques.
  8
  9The dataset is located at
 10https://figshare.com/collections/The_Spheroid_Light_Microscopy_Image_Atlas_SLiMIA_for_morphometrical_analysis_of_three_dimensional_cell_cultures/7486311.
 11This dataset is from the publication https://doi.org/10.1038/s41597-025-04441-x.
 12Please cite it if you use this dataset in your research.
 13"""
 14
 15import os
 16from glob import glob
 17from typing import Union, Tuple, List, Optional
 18
 19from torch.utils.data import Dataset, DataLoader
 20
 21import torch_em
 22
 23from .. import util
 24
 25
 26URLS = {
 27    "OperaPhenix": "https://ndownloader.figshare.com/files/50120850",
 28    "OlympusIX05": "https://ndownloader.figshare.com/files/50120853",
 29    "Axiovert200M": "https://ndownloader.figshare.com/files/50122224",
 30    "Cytation5": "https://ndownloader.figshare.com/files/50122194",
 31    "LeicaDMi3000B": "https://ndownloader.figshare.com/files/50122473",
 32    "Axiovert200": "https://ndownloader.figshare.com/files/50134212",
 33    "IncucyteS3": "https://ndownloader.figshare.com/files/50134218",
 34    "LeicaDMi1": "https://ndownloader.figshare.com/files/50134776",
 35    "IncucyteZOOM": "https://ndownloader.figshare.com/files/50136054",
 36}
 37
 38MICROSCOPES = list(URLS.keys())
 39
 40
 41def _create_h5_data(path, microscope):
 42    """Create h5 files with raw images and binary spheroid labels."""
 43    import h5py
 44    import imageio.v3 as imageio
 45    from tqdm import tqdm
 46
 47    h5_dir = os.path.join(path, "h5_data", microscope)
 48    os.makedirs(h5_dir, exist_ok=True)
 49
 50    micro_dir = os.path.join(path, microscope)
 51    image_dir = os.path.join(micro_dir, "Images")
 52    seg_dir = os.path.join(micro_dir, "Manual segmentations")
 53
 54    cell_lines = sorted(os.listdir(image_dir))
 55
 56    for cell_line in cell_lines:
 57        cl_image_dir = os.path.join(image_dir, cell_line)
 58        cl_seg_dir = os.path.join(seg_dir, cell_line)
 59
 60        if not os.path.isdir(cl_image_dir) or not os.path.isdir(cl_seg_dir):
 61            continue
 62
 63        image_paths = sorted(glob(os.path.join(cl_image_dir, "*.tiff")))
 64
 65        for image_path in tqdm(image_paths, desc=f"Creating h5 for {microscope}/{cell_line}"):
 66            # Match image to mask: image is .ome.tiff, mask is .tiff with the same base name.
 67            base_name = os.path.basename(image_path).replace(".ome.tiff", "").replace(".tiff", "")
 68            h5_path = os.path.join(h5_dir, f"{base_name}.h5")
 69
 70            if os.path.exists(h5_path):
 71                continue
 72
 73            # Try both naming conventions for the mask.
 74            seg_path = os.path.join(cl_seg_dir, f"{base_name}.tiff")
 75            if not os.path.exists(seg_path):
 76                seg_path = os.path.join(cl_seg_dir, f"{base_name}.ome.tiff")
 77
 78            if not os.path.exists(seg_path):
 79                continue
 80
 81            raw = imageio.imread(image_path)
 82            seg = imageio.imread(seg_path)
 83
 84            # Convert binary mask (0/255) to labels (0/1).
 85            labels = (seg > 0).astype("int64")
 86
 87            with h5py.File(h5_path, "w") as f:
 88                f.create_dataset("raw", data=raw, compression="gzip")
 89                f.create_dataset("labels", data=labels, compression="gzip")
 90
 91    return h5_dir
 92
 93
 94def get_slimia_data(
 95    path: Union[os.PathLike, str],
 96    microscope: Optional[Union[str, List[str]]] = None,
 97    download: bool = False,
 98) -> str:
 99    """Download the SLiMIA dataset.
100
101    Args:
102        path: Filepath to a folder where the downloaded data will be saved.
103        microscope: The microscope(s) to download data for. If None, all microscopes will be downloaded.
104            Available microscopes: OperaPhenix, OlympusIX05, Axiovert200M, Cytation5,
105            LeicaDMi3000B, Axiovert200, IncucyteS3, LeicaDMi1, IncucyteZOOM.
106        download: Whether to download the data if it is not present.
107
108    Returns:
109        The filepath to the directory with the data.
110    """
111    if microscope is None:
112        microscope = MICROSCOPES
113    elif isinstance(microscope, str):
114        microscope = [microscope]
115
116    for micro in microscope:
117        assert micro in MICROSCOPES, f"'{micro}' is not a valid microscope. Choose from {MICROSCOPES}."
118
119        micro_dir = os.path.join(path, micro)
120        if os.path.exists(micro_dir):
121            continue
122
123        os.makedirs(path, exist_ok=True)
124        zip_path = os.path.join(path, f"{micro}.zip")
125        util.download_source(path=zip_path, url=URLS[micro], download=download, checksum=None)
126        util.unzip(zip_path=zip_path, dst=path)
127
128    return path
129
130
131def get_slimia_paths(
132    path: Union[os.PathLike, str],
133    microscope: Optional[Union[str, List[str]]] = None,
134    download: bool = False,
135) -> List[str]:
136    """Get paths to the SLiMIA data.
137
138    Args:
139        path: Filepath to a folder where the downloaded data will be saved.
140        microscope: The microscope(s) to use. If None, all microscopes will be used.
141        download: Whether to download the data if it is not present.
142
143    Returns:
144        List of filepaths for the h5 data.
145    """
146    from natsort import natsorted
147
148    if microscope is None:
149        microscope = MICROSCOPES
150    elif isinstance(microscope, str):
151        microscope = [microscope]
152
153    get_slimia_data(path, microscope, download)
154
155    all_h5_paths = []
156    for micro in microscope:
157        h5_dir = os.path.join(path, "h5_data", micro)
158        if not os.path.exists(h5_dir) or len(glob(os.path.join(h5_dir, "*.h5"))) == 0:
159            _create_h5_data(path, micro)
160
161        h5_paths = glob(os.path.join(h5_dir, "*.h5"))
162        all_h5_paths.extend(h5_paths)
163
164    assert len(all_h5_paths) > 0, f"No data found for microscope(s) '{microscope}'"
165
166    return natsorted(all_h5_paths)
167
168
169def get_slimia_dataset(
170    path: Union[os.PathLike, str],
171    patch_shape: Tuple[int, int],
172    microscope: Optional[Union[str, List[str]]] = None,
173    download: bool = False,
174    **kwargs
175) -> Dataset:
176    """Get the SLiMIA dataset for spheroid segmentation.
177
178    Args:
179        path: Filepath to a folder where the downloaded data will be saved.
180        patch_shape: The patch shape to use for training.
181        microscope: The microscope(s) to use. If None, all microscopes will be used.
182        download: Whether to download the data if it is not present.
183        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
184
185    Returns:
186        The segmentation dataset.
187    """
188    h5_paths = get_slimia_paths(path, microscope, download)
189
190    kwargs, _ = util.add_instance_label_transform(
191        kwargs, add_binary_target=True,
192    )
193    kwargs = util.ensure_transforms(ndim=2, **kwargs)
194
195    return torch_em.default_segmentation_dataset(
196        raw_paths=h5_paths,
197        raw_key="raw",
198        label_paths=h5_paths,
199        label_key="labels",
200        patch_shape=patch_shape,
201        ndim=2,
202        **kwargs
203    )
204
205
206def get_slimia_loader(
207    path: Union[os.PathLike, str],
208    batch_size: int,
209    patch_shape: Tuple[int, int],
210    microscope: Optional[Union[str, List[str]]] = None,
211    download: bool = False,
212    **kwargs
213) -> DataLoader:
214    """Get the SLiMIA dataloader for spheroid segmentation.
215
216    Args:
217        path: Filepath to a folder where the downloaded data will be saved.
218        batch_size: The batch size for training.
219        patch_shape: The patch shape to use for training.
220        microscope: The microscope(s) to use. If None, all microscopes will be used.
221        download: Whether to download the data if it is not present.
222        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
223
224    Returns:
225        The DataLoader.
226    """
227    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
228    dataset = get_slimia_dataset(
229        path=path,
230        patch_shape=patch_shape,
231        microscope=microscope,
232        download=download,
233        **ds_kwargs,
234    )
235    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URLS = {'OperaPhenix': 'https://ndownloader.figshare.com/files/50120850', 'OlympusIX05': 'https://ndownloader.figshare.com/files/50120853', 'Axiovert200M': 'https://ndownloader.figshare.com/files/50122224', 'Cytation5': 'https://ndownloader.figshare.com/files/50122194', 'LeicaDMi3000B': 'https://ndownloader.figshare.com/files/50122473', 'Axiovert200': 'https://ndownloader.figshare.com/files/50134212', 'IncucyteS3': 'https://ndownloader.figshare.com/files/50134218', 'LeicaDMi1': 'https://ndownloader.figshare.com/files/50134776', 'IncucyteZOOM': 'https://ndownloader.figshare.com/files/50136054'}
MICROSCOPES = ['OperaPhenix', 'OlympusIX05', 'Axiovert200M', 'Cytation5', 'LeicaDMi3000B', 'Axiovert200', 'IncucyteS3', 'LeicaDMi1', 'IncucyteZOOM']
def get_slimia_data( path: Union[os.PathLike, str], microscope: Union[List[str], str, NoneType] = None, download: bool = False) -> str:
 95def get_slimia_data(
 96    path: Union[os.PathLike, str],
 97    microscope: Optional[Union[str, List[str]]] = None,
 98    download: bool = False,
 99) -> str:
100    """Download the SLiMIA dataset.
101
102    Args:
103        path: Filepath to a folder where the downloaded data will be saved.
104        microscope: The microscope(s) to download data for. If None, all microscopes will be downloaded.
105            Available microscopes: OperaPhenix, OlympusIX05, Axiovert200M, Cytation5,
106            LeicaDMi3000B, Axiovert200, IncucyteS3, LeicaDMi1, IncucyteZOOM.
107        download: Whether to download the data if it is not present.
108
109    Returns:
110        The filepath to the directory with the data.
111    """
112    if microscope is None:
113        microscope = MICROSCOPES
114    elif isinstance(microscope, str):
115        microscope = [microscope]
116
117    for micro in microscope:
118        assert micro in MICROSCOPES, f"'{micro}' is not a valid microscope. Choose from {MICROSCOPES}."
119
120        micro_dir = os.path.join(path, micro)
121        if os.path.exists(micro_dir):
122            continue
123
124        os.makedirs(path, exist_ok=True)
125        zip_path = os.path.join(path, f"{micro}.zip")
126        util.download_source(path=zip_path, url=URLS[micro], download=download, checksum=None)
127        util.unzip(zip_path=zip_path, dst=path)
128
129    return path

Download the SLiMIA dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • microscope: The microscope(s) to download data for. If None, all microscopes will be downloaded. Available microscopes: OperaPhenix, OlympusIX05, Axiovert200M, Cytation5, LeicaDMi3000B, Axiovert200, IncucyteS3, LeicaDMi1, IncucyteZOOM.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the directory with the data.

def get_slimia_paths( path: Union[os.PathLike, str], microscope: Union[List[str], str, NoneType] = None, download: bool = False) -> List[str]:
132def get_slimia_paths(
133    path: Union[os.PathLike, str],
134    microscope: Optional[Union[str, List[str]]] = None,
135    download: bool = False,
136) -> List[str]:
137    """Get paths to the SLiMIA data.
138
139    Args:
140        path: Filepath to a folder where the downloaded data will be saved.
141        microscope: The microscope(s) to use. If None, all microscopes will be used.
142        download: Whether to download the data if it is not present.
143
144    Returns:
145        List of filepaths for the h5 data.
146    """
147    from natsort import natsorted
148
149    if microscope is None:
150        microscope = MICROSCOPES
151    elif isinstance(microscope, str):
152        microscope = [microscope]
153
154    get_slimia_data(path, microscope, download)
155
156    all_h5_paths = []
157    for micro in microscope:
158        h5_dir = os.path.join(path, "h5_data", micro)
159        if not os.path.exists(h5_dir) or len(glob(os.path.join(h5_dir, "*.h5"))) == 0:
160            _create_h5_data(path, micro)
161
162        h5_paths = glob(os.path.join(h5_dir, "*.h5"))
163        all_h5_paths.extend(h5_paths)
164
165    assert len(all_h5_paths) > 0, f"No data found for microscope(s) '{microscope}'"
166
167    return natsorted(all_h5_paths)

Get paths to the SLiMIA data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • microscope: The microscope(s) to use. If None, all microscopes will be used.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the h5 data.

def get_slimia_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], microscope: Union[List[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
170def get_slimia_dataset(
171    path: Union[os.PathLike, str],
172    patch_shape: Tuple[int, int],
173    microscope: Optional[Union[str, List[str]]] = None,
174    download: bool = False,
175    **kwargs
176) -> Dataset:
177    """Get the SLiMIA dataset for spheroid segmentation.
178
179    Args:
180        path: Filepath to a folder where the downloaded data will be saved.
181        patch_shape: The patch shape to use for training.
182        microscope: The microscope(s) to use. If None, all microscopes will be used.
183        download: Whether to download the data if it is not present.
184        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
185
186    Returns:
187        The segmentation dataset.
188    """
189    h5_paths = get_slimia_paths(path, microscope, download)
190
191    kwargs, _ = util.add_instance_label_transform(
192        kwargs, add_binary_target=True,
193    )
194    kwargs = util.ensure_transforms(ndim=2, **kwargs)
195
196    return torch_em.default_segmentation_dataset(
197        raw_paths=h5_paths,
198        raw_key="raw",
199        label_paths=h5_paths,
200        label_key="labels",
201        patch_shape=patch_shape,
202        ndim=2,
203        **kwargs
204    )

Get the SLiMIA dataset for spheroid segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • microscope: The microscope(s) to use. If None, all microscopes will be used.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_slimia_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], microscope: Union[List[str], str, NoneType] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
207def get_slimia_loader(
208    path: Union[os.PathLike, str],
209    batch_size: int,
210    patch_shape: Tuple[int, int],
211    microscope: Optional[Union[str, List[str]]] = None,
212    download: bool = False,
213    **kwargs
214) -> DataLoader:
215    """Get the SLiMIA dataloader for spheroid segmentation.
216
217    Args:
218        path: Filepath to a folder where the downloaded data will be saved.
219        batch_size: The batch size for training.
220        patch_shape: The patch shape to use for training.
221        microscope: The microscope(s) to use. If None, all microscopes will be used.
222        download: Whether to download the data if it is not present.
223        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
224
225    Returns:
226        The DataLoader.
227    """
228    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
229    dataset = get_slimia_dataset(
230        path=path,
231        patch_shape=patch_shape,
232        microscope=microscope,
233        download=download,
234        **ds_kwargs,
235    )
236    return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)

Get the SLiMIA dataloader for spheroid segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • microscope: The microscope(s) to use. If None, all microscopes will be used.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.