torch_em.data.datasets.light_microscopy.neurosphere

The Neurosphere dataset contains a 3D fluorescence light-sheet microscopy image of a cancer cell neurosphere with ground truth instance segmentation from the OpenSegSPIM analysis pipeline.

The dataset consists of a single volume of approximately 115 x 150 x 150 voxels.

NOTE: The segmentations are pixelated at the boundaries and don't exactly match the segmentation.

The dataset is located at https://sourceforge.net/projects/opensegspim/. This dataset is from the publication https://doi.org/10.1093/bioinformatics/btw093. Please cite it if you use this dataset in your research.

  1"""The Neurosphere dataset contains a 3D fluorescence light-sheet microscopy image
  2of a cancer cell neurosphere with ground truth instance segmentation from the
  3OpenSegSPIM analysis pipeline.
  4
  5The dataset consists of a single volume of approximately 115 x 150 x 150 voxels.
  6
  7NOTE: The segmentations are pixelated at the boundaries and don't exactly match the segmentation.
  8
  9The dataset is located at https://sourceforge.net/projects/opensegspim/.
 10This dataset is from the publication https://doi.org/10.1093/bioinformatics/btw093.
 11Please cite it if you use this dataset in your research.
 12"""
 13
 14import os
 15from glob import glob
 16from natsort import natsorted
 17from typing import List, Tuple, Union
 18
 19import numpy as np
 20from scipy.ndimage import binary_fill_holes
 21
 22from torch.utils.data import Dataset, DataLoader
 23
 24import torch_em
 25
 26from .. import util
 27
 28
 29RAW_URL = "https://downloads.sourceforge.net/project/opensegspim/Sample%20Data/Neurosphere_Dataset.zip"
 30LABEL_URL = "https://downloads.sourceforge.net/project/opensegspim/Sample%20Data/Neurosphere_OpenSegSPIM.zip"
 31RAW_CHECKSUM = None
 32LABEL_CHECKSUM = None
 33
 34
 35def get_neurosphere_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 36    """Download the Neurosphere dataset.
 37
 38    Args:
 39        path: Filepath to a folder where the downloaded data will be saved.
 40        download: Whether to download the data if it is not present.
 41
 42    Returns:
 43        The filepath to the extracted data directory.
 44    """
 45    data_dir = os.path.join(path, "neurosphere")
 46    if os.path.exists(data_dir):
 47        return data_dir
 48
 49    os.makedirs(data_dir, exist_ok=True)
 50
 51    raw_zip = os.path.join(path, "Neurosphere_Dataset.zip")
 52    label_zip = os.path.join(path, "Neurosphere_OpenSegSPIM.zip")
 53
 54    util.download_source(raw_zip, RAW_URL, download, checksum=RAW_CHECKSUM)
 55    util.download_source(label_zip, LABEL_URL, download, checksum=LABEL_CHECKSUM)
 56
 57    util.unzip(raw_zip, data_dir)
 58    util.unzip(label_zip, data_dir)
 59
 60    return data_dir
 61
 62
 63def _fill_labels(data_dir: str) -> str:
 64    """Convert thin-shell contour labels to filled 3D instance segmentations.
 65
 66    Loads Nucleisegmented2.tif, applies binary_fill_holes per instance,
 67    renumbers to sequential IDs (1, 2, 3 ...), and saves as filled_labels.tif.
 68
 69    Args:
 70        data_dir: The neurosphere data directory.
 71
 72    Returns:
 73        Path to the filled label file.
 74    """
 75    import imageio.v3 as imageio
 76
 77    filled_path = os.path.join(data_dir, "filled_labels.tif")
 78    if os.path.exists(filled_path):
 79        return filled_path
 80
 81    label_paths = natsorted(glob(os.path.join(data_dir, "**", "Nucleisegmented2.tif"), recursive=True))
 82    if len(label_paths) == 0:
 83        raise RuntimeError(f"Label file 'Nucleisegmented2.tif' not found in {data_dir}.")
 84
 85    raw_labels = imageio.imread(label_paths[0])
 86    instance_ids = np.unique(raw_labels)
 87    instance_ids = instance_ids[instance_ids != 0]
 88
 89    filled = np.zeros(raw_labels.shape, dtype=np.int32)
 90    for new_id, val in enumerate(instance_ids, start=1):
 91        mask = binary_fill_holes(raw_labels == val)
 92        filled[mask] = new_id
 93
 94    imageio.imwrite(filled_path, filled)
 95    return filled_path
 96
 97
 98def get_neurosphere_paths(
 99    path: Union[os.PathLike, str], download: bool = False,
100) -> Tuple[List[str], List[str]]:
101    """Get paths to the Neurosphere data.
102
103    Args:
104        path: Filepath to a folder where the downloaded data will be saved.
105        download: Whether to download the data if it is not present.
106
107    Returns:
108        List of filepaths for the image data.
109        List of filepaths for the label data.
110    """
111    data_dir = get_neurosphere_data(path, download)
112
113    raw_paths = natsorted(glob(os.path.join(data_dir, "OriginalStack.tif")))
114    if len(raw_paths) == 0:
115        raise RuntimeError(
116            f"Raw image 'OriginalStack.tif' not found in {data_dir}. "
117            "Please check the dataset structure after downloading."
118        )
119
120    filled_label_path = _fill_labels(data_dir)
121    label_paths = [filled_label_path]
122
123    return raw_paths, label_paths
124
125
126def get_neurosphere_dataset(
127    path: Union[os.PathLike, str],
128    patch_shape: Tuple[int, ...],
129    download: bool = False,
130    **kwargs,
131) -> Dataset:
132    """Get the Neurosphere dataset for 3D cell instance segmentation.
133
134    Args:
135        path: Filepath to a folder where the downloaded data will be saved.
136        patch_shape: The patch shape to use for training.
137        download: Whether to download the data if it is not present.
138        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
139
140    Returns:
141        The segmentation dataset.
142    """
143    raw_paths, label_paths = get_neurosphere_paths(path, download)
144
145    return torch_em.default_segmentation_dataset(
146        raw_paths=raw_paths,
147        raw_key=None,
148        label_paths=label_paths,
149        label_key=None,
150        patch_shape=patch_shape,
151        **kwargs,
152    )
153
154
155def get_neurosphere_loader(
156    path: Union[os.PathLike, str],
157    batch_size: int,
158    patch_shape: Tuple[int, ...],
159    download: bool = False,
160    **kwargs,
161) -> DataLoader:
162    """Get the Neurosphere dataloader for 3D cell instance segmentation.
163
164    Args:
165        path: Filepath to a folder where the downloaded data will be saved.
166        batch_size: The batch size for training.
167        patch_shape: The patch shape to use for training.
168        download: Whether to download the data if it is not present.
169        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
170
171    Returns:
172        The DataLoader.
173    """
174    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
175    dataset = get_neurosphere_dataset(path, patch_shape, download, **ds_kwargs)
176    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
RAW_URL = 'https://downloads.sourceforge.net/project/opensegspim/Sample%20Data/Neurosphere_Dataset.zip'
LABEL_URL = 'https://downloads.sourceforge.net/project/opensegspim/Sample%20Data/Neurosphere_OpenSegSPIM.zip'
RAW_CHECKSUM = None
LABEL_CHECKSUM = None
def get_neurosphere_data(path: Union[os.PathLike, str], download: bool = False) -> str:
36def get_neurosphere_data(path: Union[os.PathLike, str], download: bool = False) -> str:
37    """Download the Neurosphere dataset.
38
39    Args:
40        path: Filepath to a folder where the downloaded data will be saved.
41        download: Whether to download the data if it is not present.
42
43    Returns:
44        The filepath to the extracted data directory.
45    """
46    data_dir = os.path.join(path, "neurosphere")
47    if os.path.exists(data_dir):
48        return data_dir
49
50    os.makedirs(data_dir, exist_ok=True)
51
52    raw_zip = os.path.join(path, "Neurosphere_Dataset.zip")
53    label_zip = os.path.join(path, "Neurosphere_OpenSegSPIM.zip")
54
55    util.download_source(raw_zip, RAW_URL, download, checksum=RAW_CHECKSUM)
56    util.download_source(label_zip, LABEL_URL, download, checksum=LABEL_CHECKSUM)
57
58    util.unzip(raw_zip, data_dir)
59    util.unzip(label_zip, data_dir)
60
61    return data_dir

Download the Neurosphere dataset.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the extracted data directory.

def get_neurosphere_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
 99def get_neurosphere_paths(
100    path: Union[os.PathLike, str], download: bool = False,
101) -> Tuple[List[str], List[str]]:
102    """Get paths to the Neurosphere data.
103
104    Args:
105        path: Filepath to a folder where the downloaded data will be saved.
106        download: Whether to download the data if it is not present.
107
108    Returns:
109        List of filepaths for the image data.
110        List of filepaths for the label data.
111    """
112    data_dir = get_neurosphere_data(path, download)
113
114    raw_paths = natsorted(glob(os.path.join(data_dir, "OriginalStack.tif")))
115    if len(raw_paths) == 0:
116        raise RuntimeError(
117            f"Raw image 'OriginalStack.tif' not found in {data_dir}. "
118            "Please check the dataset structure after downloading."
119        )
120
121    filled_label_path = _fill_labels(data_dir)
122    label_paths = [filled_label_path]
123
124    return raw_paths, label_paths

Get paths to the Neurosphere data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_neurosphere_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
127def get_neurosphere_dataset(
128    path: Union[os.PathLike, str],
129    patch_shape: Tuple[int, ...],
130    download: bool = False,
131    **kwargs,
132) -> Dataset:
133    """Get the Neurosphere dataset for 3D cell instance segmentation.
134
135    Args:
136        path: Filepath to a folder where the downloaded data will be saved.
137        patch_shape: The patch shape to use for training.
138        download: Whether to download the data if it is not present.
139        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
140
141    Returns:
142        The segmentation dataset.
143    """
144    raw_paths, label_paths = get_neurosphere_paths(path, download)
145
146    return torch_em.default_segmentation_dataset(
147        raw_paths=raw_paths,
148        raw_key=None,
149        label_paths=label_paths,
150        label_key=None,
151        patch_shape=patch_shape,
152        **kwargs,
153    )

Get the Neurosphere dataset for 3D cell instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_neurosphere_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, ...], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
156def get_neurosphere_loader(
157    path: Union[os.PathLike, str],
158    batch_size: int,
159    patch_shape: Tuple[int, ...],
160    download: bool = False,
161    **kwargs,
162) -> DataLoader:
163    """Get the Neurosphere dataloader for 3D cell instance segmentation.
164
165    Args:
166        path: Filepath to a folder where the downloaded data will be saved.
167        batch_size: The batch size for training.
168        patch_shape: The patch shape to use for training.
169        download: Whether to download the data if it is not present.
170        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
171
172    Returns:
173        The DataLoader.
174    """
175    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
176    dataset = get_neurosphere_dataset(path, patch_shape, download, **ds_kwargs)
177    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Neurosphere dataloader for 3D cell instance segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.