torch_em.data.datasets.light_microscopy.dynamicnuclearnet

The DynamicNuclearNet dataset contains annotations for nucleus segmentation and tracking in fluorescence light microscopy, for five different cell lines.

This dataset is from the publication https://doi.org/10.1101/803205. Please cite it if you use this dataset for your research.

This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data and download it yourself.

View Source

  1"""The DynamicNuclearNet dataset contains annotations for nucleus segmentation
  2and tracking in fluorescence light microscopy, for five different cell lines.
  3
  4This dataset is from the publication https://doi.org/10.1101/803205.
  5Please cite it if you use this dataset for your research.
  6
  7This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data
  8and download it yourself.
  9"""
 10
 11import os
 12from tqdm import tqdm
 13from glob import glob
 14from typing import Tuple, Union, Literal, List
 15
 16import numpy as np
 17import pandas as pd
 18
 19from torch.utils.data import Dataset, DataLoader
 20
 21import torch_em
 22
 23from .. import util
 24
 25
 26def _create_split(path, split):
 27    import z5py
 28
 29    split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")
 30    split_folder = os.path.join(path, split)
 31    os.makedirs(split_folder, exist_ok=True)
 32    data = np.load(split_file, allow_pickle=True)
 33
 34    x, y = data["X"], data["y"]
 35    metadata = data["meta"]
 36    metadata = pd.DataFrame(metadata[1:], columns=metadata[0])
 37
 38    for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"):
 39        out_path = os.path.join(split_folder, f"image_{i:04}.zarr")
 40        image_channel = im[..., 0]
 41        label_channel = label[..., 0]
 42        chunks = image_channel.shape
 43        with z5py.File(out_path, "a") as f:
 44            f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks)
 45            f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks)
 46
 47    os.remove(split_file)
 48
 49
 50def _create_dataset(path, zip_path):
 51    util.unzip(zip_path, path, remove=False)
 52    splits = ["train", "val", "test"]
 53    assert all(
 54        [os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits]
 55    )
 56    for split in splits:
 57        _create_split(path, split)
 58
 59
 60def get_dynamicnuclearnet_data(
 61    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False,
 62) -> str:
 63    """Download the DynamicNuclearNet dataset.
 64
 65    NOTE: Automatic download is not supported for DynamicNuclearnet dataset.
 66    Please download the dataset from https://datasets.deepcell.org/data.
 67
 68    Args:
 69        path: Filepath to a folder where the downloaded data will be saved.
 70        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
 71        download: Whether to download the data if it is not present.
 72
 73    Returns:
 74        The path where inputs are stored per split.
 75    """
 76    splits = ["train", "val", "test"]
 77    assert split in splits, f"'{split}' is not a valid split."
 78
 79    # check if the dataset exists already
 80    zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip")
 81    if all([os.path.exists(os.path.join(path, split)) for split in splits]):  # yes it does
 82        pass
 83    elif os.path.exists(zip_path):  # no it does not, but we have the zip there and can unpack it
 84        _create_dataset(path, zip_path)
 85    else:
 86        raise RuntimeError(
 87            "We do not support automatic download for the dynamic nuclear net dataset yet. "
 88            f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}"
 89        )
 90
 91    split_folder = os.path.join(path, split)
 92    return split_folder
 93
 94
 95def get_dynamicnuclearnet_paths(path: Union[os.PathLike, str], split: str, download: bool = False) -> List[str]:
 96    """Get paths to the DynamicNuclearNet data.
 97
 98    Args:
 99        path: Filepath to a folder where the downloaded data will be saved.
100        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
101        download: Whether to download the data if it is not present.
102
103    Returns:
104        List of filepaths for the stored data.
105    """
106    split_folder = get_dynamicnuclearnet_data(path, split, download)
107    assert os.path.exists(split_folder)
108    data_paths = glob(os.path.join(split_folder, "*.zarr"))
109    assert len(data_paths) > 0
110
111    return data_paths
112
113
114def get_dynamicnuclearnet_dataset(
115    path: Union[os.PathLike, str],
116    patch_shape: Tuple[int, int],
117    split: Literal['train', 'val', 'test'],
118    download: bool = False,
119    **kwargs
120) -> Dataset:
121    """Get the DynamicNuclearNet dataset for nucleus segmentation.
122
123    Args:
124        path: Filepath to a folder where the downloaded data will be saved.
125        patch_shape: The patch shape to use for training.
126        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
127        download: Whether to download the data if it is not present.
128        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
129
130    Returns:
131       The segmentation dataset.
132    """
133    data_paths = get_dynamicnuclearnet_paths(path, split, download)
134
135    return torch_em.default_segmentation_dataset(
136        raw_paths=data_paths,
137        raw_key="raw",
138        label_paths=data_paths,
139        label_key="labels",
140        patch_shape=patch_shape,
141        is_seg_dataset=True,
142        ndim=2,
143        **kwargs
144    )
145
146
147def get_dynamicnuclearnet_loader(
148    path: Union[os.PathLike, str],
149    batch_size: int,
150    patch_shape: Tuple[int, int],
151    split: Literal['train', 'val', 'test'],
152    download: bool = False,
153    **kwargs
154) -> DataLoader:
155    """Get the DynamicNuclearNet dataloader for nucleus segmentation.
156
157    Args:
158        path: Filepath to a folder where the downloaded data will be saved.
159        batch_size: The batch size for training.
160        patch_shape: The patch shape to use for training.
161        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
162        download: Whether to download the data if it is not present.
163        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
164
165    Returns:
166        The DataLoader.
167    """
168    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
169    dataset = get_dynamicnuclearnet_dataset(path, patch_shape, split, download, **ds_kwargs)
170    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

def get_dynamicnuclearnet_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False) -> str: View Source

61def get_dynamicnuclearnet_data(
62    path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False,
63) -> str:
64    """Download the DynamicNuclearNet dataset.
65
66    NOTE: Automatic download is not supported for DynamicNuclearnet dataset.
67    Please download the dataset from https://datasets.deepcell.org/data.
68
69    Args:
70        path: Filepath to a folder where the downloaded data will be saved.
71        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
72        download: Whether to download the data if it is not present.
73
74    Returns:
75        The path where inputs are stored per split.
76    """
77    splits = ["train", "val", "test"]
78    assert split in splits, f"'{split}' is not a valid split."
79
80    # check if the dataset exists already
81    zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip")
82    if all([os.path.exists(os.path.join(path, split)) for split in splits]):  # yes it does
83        pass
84    elif os.path.exists(zip_path):  # no it does not, but we have the zip there and can unpack it
85        _create_dataset(path, zip_path)
86    else:
87        raise RuntimeError(
88            "We do not support automatic download for the dynamic nuclear net dataset yet. "
89            f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}"
90        )
91
92    split_folder = os.path.join(path, split)
93    return split_folder

Download the DynamicNuclearNet dataset.

NOTE: Automatic download is not supported for DynamicNuclearnet dataset. Please download the dataset from https://datasets.deepcell.org/data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split to use for the dataset. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.

Returns:

The path where inputs are stored per split.

def get_dynamicnuclearnet_paths( path: Union[os.PathLike, str], split: str, download: bool = False) -> List[str]: View Source

 96def get_dynamicnuclearnet_paths(path: Union[os.PathLike, str], split: str, download: bool = False) -> List[str]:
 97    """Get paths to the DynamicNuclearNet data.
 98
 99    Args:
100        path: Filepath to a folder where the downloaded data will be saved.
101        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
102        download: Whether to download the data if it is not present.
103
104    Returns:
105        List of filepaths for the stored data.
106    """
107    split_folder = get_dynamicnuclearnet_data(path, split, download)
108    assert os.path.exists(split_folder)
109    data_paths = glob(os.path.join(split_folder, "*.zarr"))
110    assert len(data_paths) > 0
111
112    return data_paths

Get paths to the DynamicNuclearNet data.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
split: The split to use for the dataset. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.

Returns:

List of filepaths for the stored data.

def get_dynamicnuclearnet_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset: View Source

115def get_dynamicnuclearnet_dataset(
116    path: Union[os.PathLike, str],
117    patch_shape: Tuple[int, int],
118    split: Literal['train', 'val', 'test'],
119    download: bool = False,
120    **kwargs
121) -> Dataset:
122    """Get the DynamicNuclearNet dataset for nucleus segmentation.
123
124    Args:
125        path: Filepath to a folder where the downloaded data will be saved.
126        patch_shape: The patch shape to use for training.
127        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
128        download: Whether to download the data if it is not present.
129        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
130
131    Returns:
132       The segmentation dataset.
133    """
134    data_paths = get_dynamicnuclearnet_paths(path, split, download)
135
136    return torch_em.default_segmentation_dataset(
137        raw_paths=data_paths,
138        raw_key="raw",
139        label_paths=data_paths,
140        label_key="labels",
141        patch_shape=patch_shape,
142        is_seg_dataset=True,
143        ndim=2,
144        **kwargs
145    )

Get the DynamicNuclearNet dataset for nucleus segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
patch_shape: The patch shape to use for training.
split: The split to use for the dataset. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.

Returns:

The segmentation dataset.

def get_dynamicnuclearnet_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader: View Source

148def get_dynamicnuclearnet_loader(
149    path: Union[os.PathLike, str],
150    batch_size: int,
151    patch_shape: Tuple[int, int],
152    split: Literal['train', 'val', 'test'],
153    download: bool = False,
154    **kwargs
155) -> DataLoader:
156    """Get the DynamicNuclearNet dataloader for nucleus segmentation.
157
158    Args:
159        path: Filepath to a folder where the downloaded data will be saved.
160        batch_size: The batch size for training.
161        patch_shape: The patch shape to use for training.
162        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
163        download: Whether to download the data if it is not present.
164        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
165
166    Returns:
167        The DataLoader.
168    """
169    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
170    dataset = get_dynamicnuclearnet_dataset(path, patch_shape, split, download, **ds_kwargs)
171    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the DynamicNuclearNet dataloader for nucleus segmentation.

Arguments:

path: Filepath to a folder where the downloaded data will be saved.
batch_size: The batch size for training.
patch_shape: The patch shape to use for training.
split: The split to use for the dataset. Either 'train', 'val' or 'test'.
download: Whether to download the data if it is not present.
kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.

Returns:

The DataLoader.