torch_em.data.datasets.light_microscopy.slimia
The SLiMIA dataset contains annotations for spheroid segmentation in light microscopy images from 9 different microscopes and 47 cell lines.
NOTE: The annotations are semantic segmentation of spheroids.
The dataset provides images with binary manual segmentation masks of spheroids formed using liquid overlay and hanging drop techniques.
The dataset is located at https://figshare.com/collections/The_Spheroid_Light_Microscopy_Image_Atlas_SLiMIA_for_morphometrical_analysis_of_three_dimensional_cell_cultures/7486311. This dataset is from the publication https://doi.org/10.1038/s41597-025-04441-x. Please cite it if you use this dataset in your research.
1"""The SLiMIA dataset contains annotations for spheroid segmentation 2in light microscopy images from 9 different microscopes and 47 cell lines. 3 4NOTE: The annotations are semantic segmentation of spheroids. 5 6The dataset provides images with binary manual segmentation masks of spheroids 7formed using liquid overlay and hanging drop techniques. 8 9The dataset is located at 10https://figshare.com/collections/The_Spheroid_Light_Microscopy_Image_Atlas_SLiMIA_for_morphometrical_analysis_of_three_dimensional_cell_cultures/7486311. 11This dataset is from the publication https://doi.org/10.1038/s41597-025-04441-x. 12Please cite it if you use this dataset in your research. 13""" 14 15import os 16from glob import glob 17from typing import Union, Tuple, List, Optional 18 19from torch.utils.data import Dataset, DataLoader 20 21import torch_em 22 23from .. import util 24 25 26URLS = { 27 "OperaPhenix": "https://ndownloader.figshare.com/files/50120850", 28 "OlympusIX05": "https://ndownloader.figshare.com/files/50120853", 29 "Axiovert200M": "https://ndownloader.figshare.com/files/50122224", 30 "Cytation5": "https://ndownloader.figshare.com/files/50122194", 31 "LeicaDMi3000B": "https://ndownloader.figshare.com/files/50122473", 32 "Axiovert200": "https://ndownloader.figshare.com/files/50134212", 33 "IncucyteS3": "https://ndownloader.figshare.com/files/50134218", 34 "LeicaDMi1": "https://ndownloader.figshare.com/files/50134776", 35 "IncucyteZOOM": "https://ndownloader.figshare.com/files/50136054", 36} 37 38MICROSCOPES = list(URLS.keys()) 39 40 41def _create_h5_data(path, microscope): 42 """Create h5 files with raw images and binary spheroid labels.""" 43 import h5py 44 import imageio.v3 as imageio 45 from tqdm import tqdm 46 47 h5_dir = os.path.join(path, "h5_data", microscope) 48 os.makedirs(h5_dir, exist_ok=True) 49 50 micro_dir = os.path.join(path, microscope) 51 image_dir = os.path.join(micro_dir, "Images") 52 seg_dir = os.path.join(micro_dir, "Manual segmentations") 53 54 cell_lines = sorted(os.listdir(image_dir)) 55 56 for cell_line in cell_lines: 57 cl_image_dir = os.path.join(image_dir, cell_line) 58 cl_seg_dir = os.path.join(seg_dir, cell_line) 59 60 if not os.path.isdir(cl_image_dir) or not os.path.isdir(cl_seg_dir): 61 continue 62 63 image_paths = sorted(glob(os.path.join(cl_image_dir, "*.tiff"))) 64 65 for image_path in tqdm(image_paths, desc=f"Creating h5 for {microscope}/{cell_line}"): 66 # Match image to mask: image is .ome.tiff, mask is .tiff with the same base name. 67 base_name = os.path.basename(image_path).replace(".ome.tiff", "").replace(".tiff", "") 68 h5_path = os.path.join(h5_dir, f"{base_name}.h5") 69 70 if os.path.exists(h5_path): 71 continue 72 73 # Try both naming conventions for the mask. 74 seg_path = os.path.join(cl_seg_dir, f"{base_name}.tiff") 75 if not os.path.exists(seg_path): 76 seg_path = os.path.join(cl_seg_dir, f"{base_name}.ome.tiff") 77 78 if not os.path.exists(seg_path): 79 continue 80 81 raw = imageio.imread(image_path) 82 seg = imageio.imread(seg_path) 83 84 # Convert binary mask (0/255) to labels (0/1). 85 labels = (seg > 0).astype("int64") 86 87 with h5py.File(h5_path, "w") as f: 88 f.create_dataset("raw", data=raw, compression="gzip") 89 f.create_dataset("labels", data=labels, compression="gzip") 90 91 return h5_dir 92 93 94def get_slimia_data( 95 path: Union[os.PathLike, str], 96 microscope: Optional[Union[str, List[str]]] = None, 97 download: bool = False, 98) -> str: 99 """Download the SLiMIA dataset. 100 101 Args: 102 path: Filepath to a folder where the downloaded data will be saved. 103 microscope: The microscope(s) to download data for. If None, all microscopes will be downloaded. 104 Available microscopes: OperaPhenix, OlympusIX05, Axiovert200M, Cytation5, 105 LeicaDMi3000B, Axiovert200, IncucyteS3, LeicaDMi1, IncucyteZOOM. 106 download: Whether to download the data if it is not present. 107 108 Returns: 109 The filepath to the directory with the data. 110 """ 111 if microscope is None: 112 microscope = MICROSCOPES 113 elif isinstance(microscope, str): 114 microscope = [microscope] 115 116 for micro in microscope: 117 assert micro in MICROSCOPES, f"'{micro}' is not a valid microscope. Choose from {MICROSCOPES}." 118 119 micro_dir = os.path.join(path, micro) 120 if os.path.exists(micro_dir): 121 continue 122 123 os.makedirs(path, exist_ok=True) 124 zip_path = os.path.join(path, f"{micro}.zip") 125 util.download_source(path=zip_path, url=URLS[micro], download=download, checksum=None) 126 util.unzip(zip_path=zip_path, dst=path) 127 128 return path 129 130 131def get_slimia_paths( 132 path: Union[os.PathLike, str], 133 microscope: Optional[Union[str, List[str]]] = None, 134 download: bool = False, 135) -> List[str]: 136 """Get paths to the SLiMIA data. 137 138 Args: 139 path: Filepath to a folder where the downloaded data will be saved. 140 microscope: The microscope(s) to use. If None, all microscopes will be used. 141 download: Whether to download the data if it is not present. 142 143 Returns: 144 List of filepaths for the h5 data. 145 """ 146 from natsort import natsorted 147 148 if microscope is None: 149 microscope = MICROSCOPES 150 elif isinstance(microscope, str): 151 microscope = [microscope] 152 153 get_slimia_data(path, microscope, download) 154 155 all_h5_paths = [] 156 for micro in microscope: 157 h5_dir = os.path.join(path, "h5_data", micro) 158 if not os.path.exists(h5_dir) or len(glob(os.path.join(h5_dir, "*.h5"))) == 0: 159 _create_h5_data(path, micro) 160 161 h5_paths = glob(os.path.join(h5_dir, "*.h5")) 162 all_h5_paths.extend(h5_paths) 163 164 assert len(all_h5_paths) > 0, f"No data found for microscope(s) '{microscope}'" 165 166 return natsorted(all_h5_paths) 167 168 169def get_slimia_dataset( 170 path: Union[os.PathLike, str], 171 patch_shape: Tuple[int, int], 172 microscope: Optional[Union[str, List[str]]] = None, 173 download: bool = False, 174 **kwargs 175) -> Dataset: 176 """Get the SLiMIA dataset for spheroid segmentation. 177 178 Args: 179 path: Filepath to a folder where the downloaded data will be saved. 180 patch_shape: The patch shape to use for training. 181 microscope: The microscope(s) to use. If None, all microscopes will be used. 182 download: Whether to download the data if it is not present. 183 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 184 185 Returns: 186 The segmentation dataset. 187 """ 188 h5_paths = get_slimia_paths(path, microscope, download) 189 190 kwargs, _ = util.add_instance_label_transform( 191 kwargs, add_binary_target=True, 192 ) 193 kwargs = util.ensure_transforms(ndim=2, **kwargs) 194 195 return torch_em.default_segmentation_dataset( 196 raw_paths=h5_paths, 197 raw_key="raw", 198 label_paths=h5_paths, 199 label_key="labels", 200 patch_shape=patch_shape, 201 ndim=2, 202 **kwargs 203 ) 204 205 206def get_slimia_loader( 207 path: Union[os.PathLike, str], 208 batch_size: int, 209 patch_shape: Tuple[int, int], 210 microscope: Optional[Union[str, List[str]]] = None, 211 download: bool = False, 212 **kwargs 213) -> DataLoader: 214 """Get the SLiMIA dataloader for spheroid segmentation. 215 216 Args: 217 path: Filepath to a folder where the downloaded data will be saved. 218 batch_size: The batch size for training. 219 patch_shape: The patch shape to use for training. 220 microscope: The microscope(s) to use. If None, all microscopes will be used. 221 download: Whether to download the data if it is not present. 222 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 223 224 Returns: 225 The DataLoader. 226 """ 227 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 228 dataset = get_slimia_dataset( 229 path=path, 230 patch_shape=patch_shape, 231 microscope=microscope, 232 download=download, 233 **ds_kwargs, 234 ) 235 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
95def get_slimia_data( 96 path: Union[os.PathLike, str], 97 microscope: Optional[Union[str, List[str]]] = None, 98 download: bool = False, 99) -> str: 100 """Download the SLiMIA dataset. 101 102 Args: 103 path: Filepath to a folder where the downloaded data will be saved. 104 microscope: The microscope(s) to download data for. If None, all microscopes will be downloaded. 105 Available microscopes: OperaPhenix, OlympusIX05, Axiovert200M, Cytation5, 106 LeicaDMi3000B, Axiovert200, IncucyteS3, LeicaDMi1, IncucyteZOOM. 107 download: Whether to download the data if it is not present. 108 109 Returns: 110 The filepath to the directory with the data. 111 """ 112 if microscope is None: 113 microscope = MICROSCOPES 114 elif isinstance(microscope, str): 115 microscope = [microscope] 116 117 for micro in microscope: 118 assert micro in MICROSCOPES, f"'{micro}' is not a valid microscope. Choose from {MICROSCOPES}." 119 120 micro_dir = os.path.join(path, micro) 121 if os.path.exists(micro_dir): 122 continue 123 124 os.makedirs(path, exist_ok=True) 125 zip_path = os.path.join(path, f"{micro}.zip") 126 util.download_source(path=zip_path, url=URLS[micro], download=download, checksum=None) 127 util.unzip(zip_path=zip_path, dst=path) 128 129 return path
Download the SLiMIA dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- microscope: The microscope(s) to download data for. If None, all microscopes will be downloaded. Available microscopes: OperaPhenix, OlympusIX05, Axiovert200M, Cytation5, LeicaDMi3000B, Axiovert200, IncucyteS3, LeicaDMi1, IncucyteZOOM.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the directory with the data.
132def get_slimia_paths( 133 path: Union[os.PathLike, str], 134 microscope: Optional[Union[str, List[str]]] = None, 135 download: bool = False, 136) -> List[str]: 137 """Get paths to the SLiMIA data. 138 139 Args: 140 path: Filepath to a folder where the downloaded data will be saved. 141 microscope: The microscope(s) to use. If None, all microscopes will be used. 142 download: Whether to download the data if it is not present. 143 144 Returns: 145 List of filepaths for the h5 data. 146 """ 147 from natsort import natsorted 148 149 if microscope is None: 150 microscope = MICROSCOPES 151 elif isinstance(microscope, str): 152 microscope = [microscope] 153 154 get_slimia_data(path, microscope, download) 155 156 all_h5_paths = [] 157 for micro in microscope: 158 h5_dir = os.path.join(path, "h5_data", micro) 159 if not os.path.exists(h5_dir) or len(glob(os.path.join(h5_dir, "*.h5"))) == 0: 160 _create_h5_data(path, micro) 161 162 h5_paths = glob(os.path.join(h5_dir, "*.h5")) 163 all_h5_paths.extend(h5_paths) 164 165 assert len(all_h5_paths) > 0, f"No data found for microscope(s) '{microscope}'" 166 167 return natsorted(all_h5_paths)
Get paths to the SLiMIA data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- microscope: The microscope(s) to use. If None, all microscopes will be used.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the h5 data.
170def get_slimia_dataset( 171 path: Union[os.PathLike, str], 172 patch_shape: Tuple[int, int], 173 microscope: Optional[Union[str, List[str]]] = None, 174 download: bool = False, 175 **kwargs 176) -> Dataset: 177 """Get the SLiMIA dataset for spheroid segmentation. 178 179 Args: 180 path: Filepath to a folder where the downloaded data will be saved. 181 patch_shape: The patch shape to use for training. 182 microscope: The microscope(s) to use. If None, all microscopes will be used. 183 download: Whether to download the data if it is not present. 184 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 185 186 Returns: 187 The segmentation dataset. 188 """ 189 h5_paths = get_slimia_paths(path, microscope, download) 190 191 kwargs, _ = util.add_instance_label_transform( 192 kwargs, add_binary_target=True, 193 ) 194 kwargs = util.ensure_transforms(ndim=2, **kwargs) 195 196 return torch_em.default_segmentation_dataset( 197 raw_paths=h5_paths, 198 raw_key="raw", 199 label_paths=h5_paths, 200 label_key="labels", 201 patch_shape=patch_shape, 202 ndim=2, 203 **kwargs 204 )
Get the SLiMIA dataset for spheroid segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- microscope: The microscope(s) to use. If None, all microscopes will be used.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
207def get_slimia_loader( 208 path: Union[os.PathLike, str], 209 batch_size: int, 210 patch_shape: Tuple[int, int], 211 microscope: Optional[Union[str, List[str]]] = None, 212 download: bool = False, 213 **kwargs 214) -> DataLoader: 215 """Get the SLiMIA dataloader for spheroid segmentation. 216 217 Args: 218 path: Filepath to a folder where the downloaded data will be saved. 219 batch_size: The batch size for training. 220 patch_shape: The patch shape to use for training. 221 microscope: The microscope(s) to use. If None, all microscopes will be used. 222 download: Whether to download the data if it is not present. 223 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 224 225 Returns: 226 The DataLoader. 227 """ 228 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 229 dataset = get_slimia_dataset( 230 path=path, 231 patch_shape=patch_shape, 232 microscope=microscope, 233 download=download, 234 **ds_kwargs, 235 ) 236 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the SLiMIA dataloader for spheroid segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- microscope: The microscope(s) to use. If None, all microscopes will be used.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.