torch_em.data.datasets.light_microscopy.dynamicnuclearnet
The DynamicNuclearNet dataset contains annotations for nucleus segmentation and tracking in fluorescence light microscopy, for five different cell lines.
This dataset is from the publication https://doi.org/10.1101/803205. Please cite it if you use this dataset for your research.
This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data and download it yourself.
1"""The DynamicNuclearNet dataset contains annotations for nucleus segmentation 2and tracking in fluorescence light microscopy, for five different cell lines. 3 4This dataset is from the publication https://doi.org/10.1101/803205. 5Please cite it if you use this dataset for your research. 6 7This dataset cannot be downloaded automatically, please visit https://datasets.deepcell.org/data 8and download it yourself. 9""" 10 11import os 12from tqdm import tqdm 13from glob import glob 14from typing import Tuple, Union, Literal, List 15 16import numpy as np 17import pandas as pd 18 19from torch.utils.data import Dataset, DataLoader 20 21import torch_em 22 23from .. import util 24 25 26def _create_split(path, split): 27 import z5py 28 29 split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz") 30 split_folder = os.path.join(path, split) 31 os.makedirs(split_folder, exist_ok=True) 32 data = np.load(split_file, allow_pickle=True) 33 34 x, y = data["X"], data["y"] 35 metadata = data["meta"] 36 metadata = pd.DataFrame(metadata[1:], columns=metadata[0]) 37 38 for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"): 39 out_path = os.path.join(split_folder, f"image_{i:04}.zarr") 40 image_channel = im[..., 0] 41 label_channel = label[..., 0] 42 chunks = image_channel.shape 43 with z5py.File(out_path, "a") as f: 44 f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks) 45 f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks) 46 47 os.remove(split_file) 48 49 50def _create_dataset(path, zip_path): 51 util.unzip(zip_path, path, remove=False) 52 splits = ["train", "val", "test"] 53 assert all( 54 [os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits] 55 ) 56 for split in splits: 57 _create_split(path, split) 58 59 60def get_dynamicnuclearnet_data( 61 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False, 62) -> str: 63 """Download the DynamicNuclearNet dataset. 64 65 NOTE: Automatic download is not supported for DynamicNuclearnet dataset. 66 Please download the dataset from https://datasets.deepcell.org/data. 67 68 Args: 69 path: Filepath to a folder where the downloaded data will be saved. 70 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 71 download: Whether to download the data if it is not present. 72 73 Returns: 74 The path where inputs are stored per split. 75 """ 76 splits = ["train", "val", "test"] 77 assert split in splits, f"'{split}' is not a valid split." 78 79 # check if the dataset exists already 80 zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip") 81 if all([os.path.exists(os.path.join(path, split)) for split in splits]): # yes it does 82 pass 83 elif os.path.exists(zip_path): # no it does not, but we have the zip there and can unpack it 84 _create_dataset(path, zip_path) 85 else: 86 raise RuntimeError( 87 "We do not support automatic download for the dynamic nuclear net dataset yet. " 88 f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}" 89 ) 90 91 split_folder = os.path.join(path, split) 92 return split_folder 93 94 95def get_dynamicnuclearnet_paths(path: Union[os.PathLike, str], split: str, download: bool = False) -> List[str]: 96 """Get paths to the DynamicNuclearNet data. 97 98 Args: 99 path: Filepath to a folder where the downloaded data will be saved. 100 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 101 download: Whether to download the data if it is not present. 102 103 Returns: 104 List of filepaths for the stored data. 105 """ 106 split_folder = get_dynamicnuclearnet_data(path, split, download) 107 assert os.path.exists(split_folder) 108 data_paths = glob(os.path.join(split_folder, "*.zarr")) 109 assert len(data_paths) > 0 110 111 return data_paths 112 113 114def get_dynamicnuclearnet_dataset( 115 path: Union[os.PathLike, str], 116 patch_shape: Tuple[int, int], 117 split: Literal['train', 'val', 'test'], 118 download: bool = False, 119 **kwargs 120) -> Dataset: 121 """Get the DynamicNuclearNet dataset for nucleus segmentation. 122 123 Args: 124 path: Filepath to a folder where the downloaded data will be saved. 125 patch_shape: The patch shape to use for training. 126 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 127 download: Whether to download the data if it is not present. 128 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 129 130 Returns: 131 The segmentation dataset. 132 """ 133 data_paths = get_dynamicnuclearnet_paths(path, split, download) 134 135 return torch_em.default_segmentation_dataset( 136 raw_paths=data_paths, 137 raw_key="raw", 138 label_paths=data_paths, 139 label_key="labels", 140 patch_shape=patch_shape, 141 is_seg_dataset=True, 142 ndim=2, 143 **kwargs 144 ) 145 146 147def get_dynamicnuclearnet_loader( 148 path: Union[os.PathLike, str], 149 batch_size: int, 150 patch_shape: Tuple[int, int], 151 split: Literal['train', 'val', 'test'], 152 download: bool = False, 153 **kwargs 154) -> DataLoader: 155 """Get the DynamicNuclearNet dataloader for nucleus segmentation. 156 157 Args: 158 path: Filepath to a folder where the downloaded data will be saved. 159 batch_size: The batch size for training. 160 patch_shape: The patch shape to use for training. 161 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 162 download: Whether to download the data if it is not present. 163 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 164 165 Returns: 166 The DataLoader. 167 """ 168 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 169 dataset = get_dynamicnuclearnet_dataset(path, patch_shape, split, download, **ds_kwargs) 170 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
61def get_dynamicnuclearnet_data( 62 path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool = False, 63) -> str: 64 """Download the DynamicNuclearNet dataset. 65 66 NOTE: Automatic download is not supported for DynamicNuclearnet dataset. 67 Please download the dataset from https://datasets.deepcell.org/data. 68 69 Args: 70 path: Filepath to a folder where the downloaded data will be saved. 71 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 72 download: Whether to download the data if it is not present. 73 74 Returns: 75 The path where inputs are stored per split. 76 """ 77 splits = ["train", "val", "test"] 78 assert split in splits, f"'{split}' is not a valid split." 79 80 # check if the dataset exists already 81 zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip") 82 if all([os.path.exists(os.path.join(path, split)) for split in splits]): # yes it does 83 pass 84 elif os.path.exists(zip_path): # no it does not, but we have the zip there and can unpack it 85 _create_dataset(path, zip_path) 86 else: 87 raise RuntimeError( 88 "We do not support automatic download for the dynamic nuclear net dataset yet. " 89 f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}" 90 ) 91 92 split_folder = os.path.join(path, split) 93 return split_folder
Download the DynamicNuclearNet dataset.
NOTE: Automatic download is not supported for DynamicNuclearnet dataset. Please download the dataset from https://datasets.deepcell.org/data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
The path where inputs are stored per split.
96def get_dynamicnuclearnet_paths(path: Union[os.PathLike, str], split: str, download: bool = False) -> List[str]: 97 """Get paths to the DynamicNuclearNet data. 98 99 Args: 100 path: Filepath to a folder where the downloaded data will be saved. 101 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 102 download: Whether to download the data if it is not present. 103 104 Returns: 105 List of filepaths for the stored data. 106 """ 107 split_folder = get_dynamicnuclearnet_data(path, split, download) 108 assert os.path.exists(split_folder) 109 data_paths = glob(os.path.join(split_folder, "*.zarr")) 110 assert len(data_paths) > 0 111 112 return data_paths
Get paths to the DynamicNuclearNet data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the stored data.
115def get_dynamicnuclearnet_dataset( 116 path: Union[os.PathLike, str], 117 patch_shape: Tuple[int, int], 118 split: Literal['train', 'val', 'test'], 119 download: bool = False, 120 **kwargs 121) -> Dataset: 122 """Get the DynamicNuclearNet dataset for nucleus segmentation. 123 124 Args: 125 path: Filepath to a folder where the downloaded data will be saved. 126 patch_shape: The patch shape to use for training. 127 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 128 download: Whether to download the data if it is not present. 129 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 130 131 Returns: 132 The segmentation dataset. 133 """ 134 data_paths = get_dynamicnuclearnet_paths(path, split, download) 135 136 return torch_em.default_segmentation_dataset( 137 raw_paths=data_paths, 138 raw_key="raw", 139 label_paths=data_paths, 140 label_key="labels", 141 patch_shape=patch_shape, 142 is_seg_dataset=True, 143 ndim=2, 144 **kwargs 145 )
Get the DynamicNuclearNet dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The split to use for the dataset. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
148def get_dynamicnuclearnet_loader( 149 path: Union[os.PathLike, str], 150 batch_size: int, 151 patch_shape: Tuple[int, int], 152 split: Literal['train', 'val', 'test'], 153 download: bool = False, 154 **kwargs 155) -> DataLoader: 156 """Get the DynamicNuclearNet dataloader for nucleus segmentation. 157 158 Args: 159 path: Filepath to a folder where the downloaded data will be saved. 160 batch_size: The batch size for training. 161 patch_shape: The patch shape to use for training. 162 split: The split to use for the dataset. Either 'train', 'val' or 'test'. 163 download: Whether to download the data if it is not present. 164 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 165 166 Returns: 167 The DataLoader. 168 """ 169 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 170 dataset = get_dynamicnuclearnet_dataset(path, patch_shape, split, download, **ds_kwargs) 171 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the DynamicNuclearNet dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The split to use for the dataset. Either 'train', 'val' or 'test'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.