torch_em.data.datasets.light_microscopy.neurosphere
The Neurosphere dataset contains a 3D fluorescence light-sheet microscopy image of a cancer cell neurosphere with ground truth instance segmentation from the OpenSegSPIM analysis pipeline.
The dataset consists of a single volume of approximately 115 x 150 x 150 voxels.
NOTE: The segmentations are pixelated at the boundaries and don't exactly match the segmentation.
The dataset is located at https://sourceforge.net/projects/opensegspim/. This dataset is from the publication https://doi.org/10.1093/bioinformatics/btw093. Please cite it if you use this dataset in your research.
1"""The Neurosphere dataset contains a 3D fluorescence light-sheet microscopy image 2of a cancer cell neurosphere with ground truth instance segmentation from the 3OpenSegSPIM analysis pipeline. 4 5The dataset consists of a single volume of approximately 115 x 150 x 150 voxels. 6 7NOTE: The segmentations are pixelated at the boundaries and don't exactly match the segmentation. 8 9The dataset is located at https://sourceforge.net/projects/opensegspim/. 10This dataset is from the publication https://doi.org/10.1093/bioinformatics/btw093. 11Please cite it if you use this dataset in your research. 12""" 13 14import os 15from glob import glob 16from natsort import natsorted 17from typing import List, Tuple, Union 18 19import numpy as np 20from scipy.ndimage import binary_fill_holes 21 22from torch.utils.data import Dataset, DataLoader 23 24import torch_em 25 26from .. import util 27 28 29RAW_URL = "https://downloads.sourceforge.net/project/opensegspim/Sample%20Data/Neurosphere_Dataset.zip" 30LABEL_URL = "https://downloads.sourceforge.net/project/opensegspim/Sample%20Data/Neurosphere_OpenSegSPIM.zip" 31RAW_CHECKSUM = None 32LABEL_CHECKSUM = None 33 34 35def get_neurosphere_data(path: Union[os.PathLike, str], download: bool = False) -> str: 36 """Download the Neurosphere dataset. 37 38 Args: 39 path: Filepath to a folder where the downloaded data will be saved. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 The filepath to the extracted data directory. 44 """ 45 data_dir = os.path.join(path, "neurosphere") 46 if os.path.exists(data_dir): 47 return data_dir 48 49 os.makedirs(data_dir, exist_ok=True) 50 51 raw_zip = os.path.join(path, "Neurosphere_Dataset.zip") 52 label_zip = os.path.join(path, "Neurosphere_OpenSegSPIM.zip") 53 54 util.download_source(raw_zip, RAW_URL, download, checksum=RAW_CHECKSUM) 55 util.download_source(label_zip, LABEL_URL, download, checksum=LABEL_CHECKSUM) 56 57 util.unzip(raw_zip, data_dir) 58 util.unzip(label_zip, data_dir) 59 60 return data_dir 61 62 63def _fill_labels(data_dir: str) -> str: 64 """Convert thin-shell contour labels to filled 3D instance segmentations. 65 66 Loads Nucleisegmented2.tif, applies binary_fill_holes per instance, 67 renumbers to sequential IDs (1, 2, 3 ...), and saves as filled_labels.tif. 68 69 Args: 70 data_dir: The neurosphere data directory. 71 72 Returns: 73 Path to the filled label file. 74 """ 75 import imageio.v3 as imageio 76 77 filled_path = os.path.join(data_dir, "filled_labels.tif") 78 if os.path.exists(filled_path): 79 return filled_path 80 81 label_paths = natsorted(glob(os.path.join(data_dir, "**", "Nucleisegmented2.tif"), recursive=True)) 82 if len(label_paths) == 0: 83 raise RuntimeError(f"Label file 'Nucleisegmented2.tif' not found in {data_dir}.") 84 85 raw_labels = imageio.imread(label_paths[0]) 86 instance_ids = np.unique(raw_labels) 87 instance_ids = instance_ids[instance_ids != 0] 88 89 filled = np.zeros(raw_labels.shape, dtype=np.int32) 90 for new_id, val in enumerate(instance_ids, start=1): 91 mask = binary_fill_holes(raw_labels == val) 92 filled[mask] = new_id 93 94 imageio.imwrite(filled_path, filled) 95 return filled_path 96 97 98def get_neurosphere_paths( 99 path: Union[os.PathLike, str], download: bool = False, 100) -> Tuple[List[str], List[str]]: 101 """Get paths to the Neurosphere data. 102 103 Args: 104 path: Filepath to a folder where the downloaded data will be saved. 105 download: Whether to download the data if it is not present. 106 107 Returns: 108 List of filepaths for the image data. 109 List of filepaths for the label data. 110 """ 111 data_dir = get_neurosphere_data(path, download) 112 113 raw_paths = natsorted(glob(os.path.join(data_dir, "OriginalStack.tif"))) 114 if len(raw_paths) == 0: 115 raise RuntimeError( 116 f"Raw image 'OriginalStack.tif' not found in {data_dir}. " 117 "Please check the dataset structure after downloading." 118 ) 119 120 filled_label_path = _fill_labels(data_dir) 121 label_paths = [filled_label_path] 122 123 return raw_paths, label_paths 124 125 126def get_neurosphere_dataset( 127 path: Union[os.PathLike, str], 128 patch_shape: Tuple[int, ...], 129 download: bool = False, 130 **kwargs, 131) -> Dataset: 132 """Get the Neurosphere dataset for 3D cell instance segmentation. 133 134 Args: 135 path: Filepath to a folder where the downloaded data will be saved. 136 patch_shape: The patch shape to use for training. 137 download: Whether to download the data if it is not present. 138 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 139 140 Returns: 141 The segmentation dataset. 142 """ 143 raw_paths, label_paths = get_neurosphere_paths(path, download) 144 145 return torch_em.default_segmentation_dataset( 146 raw_paths=raw_paths, 147 raw_key=None, 148 label_paths=label_paths, 149 label_key=None, 150 patch_shape=patch_shape, 151 **kwargs, 152 ) 153 154 155def get_neurosphere_loader( 156 path: Union[os.PathLike, str], 157 batch_size: int, 158 patch_shape: Tuple[int, ...], 159 download: bool = False, 160 **kwargs, 161) -> DataLoader: 162 """Get the Neurosphere dataloader for 3D cell instance segmentation. 163 164 Args: 165 path: Filepath to a folder where the downloaded data will be saved. 166 batch_size: The batch size for training. 167 patch_shape: The patch shape to use for training. 168 download: Whether to download the data if it is not present. 169 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 170 171 Returns: 172 The DataLoader. 173 """ 174 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 175 dataset = get_neurosphere_dataset(path, patch_shape, download, **ds_kwargs) 176 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
36def get_neurosphere_data(path: Union[os.PathLike, str], download: bool = False) -> str: 37 """Download the Neurosphere dataset. 38 39 Args: 40 path: Filepath to a folder where the downloaded data will be saved. 41 download: Whether to download the data if it is not present. 42 43 Returns: 44 The filepath to the extracted data directory. 45 """ 46 data_dir = os.path.join(path, "neurosphere") 47 if os.path.exists(data_dir): 48 return data_dir 49 50 os.makedirs(data_dir, exist_ok=True) 51 52 raw_zip = os.path.join(path, "Neurosphere_Dataset.zip") 53 label_zip = os.path.join(path, "Neurosphere_OpenSegSPIM.zip") 54 55 util.download_source(raw_zip, RAW_URL, download, checksum=RAW_CHECKSUM) 56 util.download_source(label_zip, LABEL_URL, download, checksum=LABEL_CHECKSUM) 57 58 util.unzip(raw_zip, data_dir) 59 util.unzip(label_zip, data_dir) 60 61 return data_dir
Download the Neurosphere dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the extracted data directory.
99def get_neurosphere_paths( 100 path: Union[os.PathLike, str], download: bool = False, 101) -> Tuple[List[str], List[str]]: 102 """Get paths to the Neurosphere data. 103 104 Args: 105 path: Filepath to a folder where the downloaded data will be saved. 106 download: Whether to download the data if it is not present. 107 108 Returns: 109 List of filepaths for the image data. 110 List of filepaths for the label data. 111 """ 112 data_dir = get_neurosphere_data(path, download) 113 114 raw_paths = natsorted(glob(os.path.join(data_dir, "OriginalStack.tif"))) 115 if len(raw_paths) == 0: 116 raise RuntimeError( 117 f"Raw image 'OriginalStack.tif' not found in {data_dir}. " 118 "Please check the dataset structure after downloading." 119 ) 120 121 filled_label_path = _fill_labels(data_dir) 122 label_paths = [filled_label_path] 123 124 return raw_paths, label_paths
Get paths to the Neurosphere data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
127def get_neurosphere_dataset( 128 path: Union[os.PathLike, str], 129 patch_shape: Tuple[int, ...], 130 download: bool = False, 131 **kwargs, 132) -> Dataset: 133 """Get the Neurosphere dataset for 3D cell instance segmentation. 134 135 Args: 136 path: Filepath to a folder where the downloaded data will be saved. 137 patch_shape: The patch shape to use for training. 138 download: Whether to download the data if it is not present. 139 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 140 141 Returns: 142 The segmentation dataset. 143 """ 144 raw_paths, label_paths = get_neurosphere_paths(path, download) 145 146 return torch_em.default_segmentation_dataset( 147 raw_paths=raw_paths, 148 raw_key=None, 149 label_paths=label_paths, 150 label_key=None, 151 patch_shape=patch_shape, 152 **kwargs, 153 )
Get the Neurosphere dataset for 3D cell instance segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
156def get_neurosphere_loader( 157 path: Union[os.PathLike, str], 158 batch_size: int, 159 patch_shape: Tuple[int, ...], 160 download: bool = False, 161 **kwargs, 162) -> DataLoader: 163 """Get the Neurosphere dataloader for 3D cell instance segmentation. 164 165 Args: 166 path: Filepath to a folder where the downloaded data will be saved. 167 batch_size: The batch size for training. 168 patch_shape: The patch shape to use for training. 169 download: Whether to download the data if it is not present. 170 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 171 172 Returns: 173 The DataLoader. 174 """ 175 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 176 dataset = get_neurosphere_dataset(path, patch_shape, download, **ds_kwargs) 177 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Neurosphere dataloader for 3D cell instance segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.