torch_em.data.datasets.light_microscopy.dynamicnuclearnet
The DynamicNuclearNet dataset contains annotations for nucleus segmentation and tracking in fluorescence light microscopy, for five different cell lines.
This dataset is from the publication https://doi.org/10.1101/803205. Please cite it if you use this dataset for your research.
This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data and download it yourself.
1"""The DynamicNuclearNet dataset contains annotations for nucleus segmentation 2and tracking in fluorescence light microscopy, for five different cell lines. 3 4This dataset is from the publication https://doi.org/10.1101/803205. 5Please cite it if you use this dataset for your research. 6 7This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data 8and download it yourself. 9""" 10 11import os 12from tqdm import tqdm 13from glob import glob 14from typing import Tuple, Union 15 16import z5py 17import numpy as np 18import pandas as pd 19 20import torch_em 21from torch.utils.data import Dataset, DataLoader 22 23from .. import util 24 25 26def _create_split(path, split): 27 split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz") 28 split_folder = os.path.join(path, split) 29 os.makedirs(split_folder, exist_ok=True) 30 data = np.load(split_file, allow_pickle=True) 31 32 x, y = data["X"], data["y"] 33 metadata = data["meta"] 34 metadata = pd.DataFrame(metadata[1:], columns=metadata[0]) 35 36 for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"): 37 out_path = os.path.join(split_folder, f"image_{i:04}.zarr") 38 image_channel = im[..., 0] 39 label_channel = label[..., 0] 40 chunks = image_channel.shape 41 with z5py.File(out_path, "a") as f: 42 f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks) 43 f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks) 44 45 os.remove(split_file) 46 47 48def _create_dataset(path, zip_path): 49 util.unzip(zip_path, path, remove=False) 50 splits = ["train", "val", "test"] 51 assert all( 52 [os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits] 53 ) 54 for split in splits: 55 _create_split(path, split) 56 57 58def get_dynamicnuclearnet_dataset( 59 path: Union[os.PathLike, str], 60 split: str, 61 patch_shape: Tuple[int, int], 62 download: bool = False, 63 **kwargs 64) -> Dataset: 65 """Get the DynamicNuclearNet dataset for nucleus segmentation. 66 67 Args: 68 path: Filepath to a folder where the downloaded data will be saved. 69 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 70 patch_shape: The patch shape to use for training. 71 download: Whether to download the data if it is not present. 72 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 73 74 Returns: 75 The segmentation dataset. 76 """ 77 splits = ["train", "val", "test"] 78 assert split in splits 79 80 # check if the dataset exists already 81 zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip") 82 if all([os.path.exists(os.path.join(path, split)) for split in splits]): # yes it does 83 pass 84 elif os.path.exists(zip_path): # no it does not, but we have the zip there and can unpack it 85 _create_dataset(path, zip_path) 86 else: 87 raise RuntimeError( 88 "We do not support automatic download for the dynamic nuclear net dataset yet. " 89 f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}" 90 ) 91 92 split_folder = os.path.join(path, split) 93 assert os.path.exists(split_folder) 94 data_path = glob(os.path.join(split_folder, "*.zarr")) 95 assert len(data_path) > 0 96 97 raw_key, label_key = "raw", "labels" 98 99 return torch_em.default_segmentation_dataset( 100 data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs 101 ) 102 103 104def get_dynamicnuclearnet_loader( 105 path: Union[os.PathLike, str], 106 split: str, 107 patch_shape: Tuple[int, int], 108 batch_size: int, 109 download: bool = False, 110 **kwargs 111) -> DataLoader: 112 """Get the DynamicNuclearNet dataloader for nucleus segmentation. 113 114 Args: 115 path: Filepath to a folder where the downloaded data will be saved. 116 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 117 patch_shape: The patch shape to use for training. 118 batch_size: The batch size for training. 119 download: Whether to download the data if it is not present. 120 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 121 122 Returns: 123 The DataLoader. 124 """ 125 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 126 dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs) 127 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 128 return loader
def
get_dynamicnuclearnet_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
59def get_dynamicnuclearnet_dataset( 60 path: Union[os.PathLike, str], 61 split: str, 62 patch_shape: Tuple[int, int], 63 download: bool = False, 64 **kwargs 65) -> Dataset: 66 """Get the DynamicNuclearNet dataset for nucleus segmentation. 67 68 Args: 69 path: Filepath to a folder where the downloaded data will be saved. 70 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 71 patch_shape: The patch shape to use for training. 72 download: Whether to download the data if it is not present. 73 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 74 75 Returns: 76 The segmentation dataset. 77 """ 78 splits = ["train", "val", "test"] 79 assert split in splits 80 81 # check if the dataset exists already 82 zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip") 83 if all([os.path.exists(os.path.join(path, split)) for split in splits]): # yes it does 84 pass 85 elif os.path.exists(zip_path): # no it does not, but we have the zip there and can unpack it 86 _create_dataset(path, zip_path) 87 else: 88 raise RuntimeError( 89 "We do not support automatic download for the dynamic nuclear net dataset yet. " 90 f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}" 91 ) 92 93 split_folder = os.path.join(path, split) 94 assert os.path.exists(split_folder) 95 data_path = glob(os.path.join(split_folder, "*.zarr")) 96 assert len(data_path) > 0 97 98 raw_key, label_key = "raw", "labels" 99 100 return torch_em.default_segmentation_dataset( 101 data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs 102 )
Get the DynamicNuclearNet dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'train', 'val' or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_dynamicnuclearnet_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
105def get_dynamicnuclearnet_loader( 106 path: Union[os.PathLike, str], 107 split: str, 108 patch_shape: Tuple[int, int], 109 batch_size: int, 110 download: bool = False, 111 **kwargs 112) -> DataLoader: 113 """Get the DynamicNuclearNet dataloader for nucleus segmentation. 114 115 Args: 116 path: Filepath to a folder where the downloaded data will be saved. 117 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 118 patch_shape: The patch shape to use for training. 119 batch_size: The batch size for training. 120 download: Whether to download the data if it is not present. 121 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 122 123 Returns: 124 The DataLoader. 125 """ 126 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 127 dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs) 128 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 129 return loader
Get the DynamicNuclearNet dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'train', 'val' or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.