torch_em.data.datasets.light_microscopy.dynamicnuclearnet

The DynamicNuclearNet dataset contains annotations for nucleus segmentation and tracking in fluorescence light microscopy, for five different cell lines.

This dataset is from the publication https://doi.org/10.1101/803205. Please cite it if you use this dataset for your research.

This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data and download it yourself.

  1"""The DynamicNuclearNet dataset contains annotations for nucleus segmentation
  2and tracking in fluorescence light microscopy, for five different cell lines.
  3
  4This dataset is from the publication https://doi.org/10.1101/803205.
  5Please cite it if you use this dataset for your research.
  6
  7This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data
  8and download it yourself.
  9"""
 10
 11import os
 12from tqdm import tqdm
 13from glob import glob
 14from typing import Tuple, Union
 15
 16import z5py
 17import numpy as np
 18import pandas as pd
 19
 20import torch_em
 21from torch.utils.data import Dataset, DataLoader
 22
 23from .. import util
 24
 25
 26def _create_split(path, split):
 27    split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")
 28    split_folder = os.path.join(path, split)
 29    os.makedirs(split_folder, exist_ok=True)
 30    data = np.load(split_file, allow_pickle=True)
 31
 32    x, y = data["X"], data["y"]
 33    metadata = data["meta"]
 34    metadata = pd.DataFrame(metadata[1:], columns=metadata[0])
 35
 36    for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"):
 37        out_path = os.path.join(split_folder, f"image_{i:04}.zarr")
 38        image_channel = im[..., 0]
 39        label_channel = label[..., 0]
 40        chunks = image_channel.shape
 41        with z5py.File(out_path, "a") as f:
 42            f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks)
 43            f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks)
 44
 45    os.remove(split_file)
 46
 47
 48def _create_dataset(path, zip_path):
 49    util.unzip(zip_path, path, remove=False)
 50    splits = ["train", "val", "test"]
 51    assert all(
 52        [os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits]
 53    )
 54    for split in splits:
 55        _create_split(path, split)
 56
 57
 58def get_dynamicnuclearnet_dataset(
 59    path: Union[os.PathLike, str],
 60    split: str,
 61    patch_shape: Tuple[int, int],
 62    download: bool = False,
 63    **kwargs
 64) -> Dataset:
 65    """Get the DynamicNuclearNet dataset for nucleus segmentation.
 66
 67    Args:
 68        path: Filepath to a folder where the downloaded data will be saved.
 69        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
 70        patch_shape: The patch shape to use for training.
 71        download: Whether to download the data if it is not present.
 72        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 73
 74    Returns:
 75       The segmentation dataset.
 76    """
 77    splits = ["train", "val", "test"]
 78    assert split in splits
 79
 80    # check if the dataset exists already
 81    zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip")
 82    if all([os.path.exists(os.path.join(path, split)) for split in splits]):  # yes it does
 83        pass
 84    elif os.path.exists(zip_path):  # no it does not, but we have the zip there and can unpack it
 85        _create_dataset(path, zip_path)
 86    else:
 87        raise RuntimeError(
 88            "We do not support automatic download for the dynamic nuclear net dataset yet. "
 89            f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}"
 90        )
 91
 92    split_folder = os.path.join(path, split)
 93    assert os.path.exists(split_folder)
 94    data_path = glob(os.path.join(split_folder, "*.zarr"))
 95    assert len(data_path) > 0
 96
 97    raw_key, label_key = "raw", "labels"
 98
 99    return torch_em.default_segmentation_dataset(
100        data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs
101    )
102
103
104def get_dynamicnuclearnet_loader(
105    path: Union[os.PathLike, str],
106    split: str,
107    patch_shape: Tuple[int, int],
108    batch_size: int,
109    download: bool = False,
110    **kwargs
111) -> DataLoader:
112    """Get the DynamicNuclearNet dataloader for nucleus segmentation.
113
114    Args:
115        path: Filepath to a folder where the downloaded data will be saved.
116        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
117        patch_shape: The patch shape to use for training.
118        batch_size: The batch size for training.
119        download: Whether to download the data if it is not present.
120        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
121
122    Returns:
123        The DataLoader.
124    """
125    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
126    dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs)
127    loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
128    return loader
def get_dynamicnuclearnet_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 59def get_dynamicnuclearnet_dataset(
 60    path: Union[os.PathLike, str],
 61    split: str,
 62    patch_shape: Tuple[int, int],
 63    download: bool = False,
 64    **kwargs
 65) -> Dataset:
 66    """Get the DynamicNuclearNet dataset for nucleus segmentation.
 67
 68    Args:
 69        path: Filepath to a folder where the downloaded data will be saved.
 70        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
 71        patch_shape: The patch shape to use for training.
 72        download: Whether to download the data if it is not present.
 73        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 74
 75    Returns:
 76       The segmentation dataset.
 77    """
 78    splits = ["train", "val", "test"]
 79    assert split in splits
 80
 81    # check if the dataset exists already
 82    zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip")
 83    if all([os.path.exists(os.path.join(path, split)) for split in splits]):  # yes it does
 84        pass
 85    elif os.path.exists(zip_path):  # no it does not, but we have the zip there and can unpack it
 86        _create_dataset(path, zip_path)
 87    else:
 88        raise RuntimeError(
 89            "We do not support automatic download for the dynamic nuclear net dataset yet. "
 90            f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}"
 91        )
 92
 93    split_folder = os.path.join(path, split)
 94    assert os.path.exists(split_folder)
 95    data_path = glob(os.path.join(split_folder, "*.zarr"))
 96    assert len(data_path) > 0
 97
 98    raw_key, label_key = "raw", "labels"
 99
100    return torch_em.default_segmentation_dataset(
101        data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs
102    )

Get the DynamicNuclearNet dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use for the dataset. Either 'train', 'val' or 'test'.
  • patch_shape: The patch shape to use for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_dynamicnuclearnet_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
105def get_dynamicnuclearnet_loader(
106    path: Union[os.PathLike, str],
107    split: str,
108    patch_shape: Tuple[int, int],
109    batch_size: int,
110    download: bool = False,
111    **kwargs
112) -> DataLoader:
113    """Get the DynamicNuclearNet dataloader for nucleus segmentation.
114
115    Args:
116        path: Filepath to a folder where the downloaded data will be saved.
117        split: The split to use for the dataset. Either 'train', 'val' or 'test'.
118        patch_shape: The patch shape to use for training.
119        batch_size: The batch size for training.
120        download: Whether to download the data if it is not present.
121        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
122
123    Returns:
124        The DataLoader.
125    """
126    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
127    dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs)
128    loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
129    return loader

Get the DynamicNuclearNet dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The split to use for the dataset. Either 'train', 'val' or 'test'.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.