torch_em.data.datasets.light_microscopy.plantseg
This dataset contains confocal and lightsheet microscopy images of plant cells with annotations for cell and nucleus segmentation.
The dataset part of the publication https://doi.org/10.7554/eLife.57613. Please cite it if you use this dataset in your research.
1"""This dataset contains confocal and lightsheet microscopy images of plant cells 2with annotations for cell and nucleus segmentation. 3 4The dataset part of the publication https://doi.org/10.7554/eLife.57613. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from glob import glob 10from typing import List, Optional, Tuple, Union 11 12import torch_em 13from torch.utils.data import Dataset, DataLoader 14from .. import util 15 16URLS = { 17 "root": { 18 "train": "https://files.de-1.osf.io/v1/resources/9x3g2/providers/osfstorage/?zip=", 19 "val": "https://files.de-1.osf.io/v1/resources/vs6gb/providers/osfstorage/?zip=", 20 "test": "https://files.de-1.osf.io/v1/resources/tn4xj/providers/osfstorage/?zip=", 21 }, 22 "nuclei": { 23 "train": "https://files.de-1.osf.io/v1/resources/thxzn/providers/osfstorage/?zip=", 24 }, 25 "ovules": { 26 "train": "https://files.de-1.osf.io/v1/resources/x9yns/providers/osfstorage/?zip=", 27 "val": "https://files.de-1.osf.io/v1/resources/xp5uf/providers/osfstorage/?zip=", 28 "test": "https://files.de-1.osf.io/v1/resources/8jz7e/providers/osfstorage/?zip=", 29 } 30} 31 32# FIXME somehow the checksums are not reliably, this is a bit weird. 33CHECKSUMS = { 34 "root": { 35 "train": None, "val": None, "test": None 36 # "train": "f72e9525ff716ef14b70ab1318efd4bf303bbf9e0772bf2981a2db6e22a75794", 37 # "val": "987280d9a56828c840e508422786431dcc3603e0ba4814aa06e7bf4424efcd9e", 38 # "test": "ad71b8b9d20effba85fb5e1b42594ae35939d1a0cf905f3403789fc9e6afbc58", 39 }, 40 "nuclei": { 41 "train": None 42 # "train": "9d19ddb61373e2a97effb6cf8bd8baae5f8a50f87024273070903ea8b1160396", 43 }, 44 "ovules": { 45 "train": None, "val": None, "test": None 46 # "train": "70379673f1ab1866df6eb09d5ce11db7d3166d6d15b53a9c8b47376f04bae413", 47 # "val": "872f516cb76879c30782d9a76d52df95236770a866f75365902c60c37b14fa36", 48 # "test": "a7272f6ad1d765af6d121e20f436ac4f3609f1a90b1cb2346aa938d8c52800b9", 49 } 50} 51# The resolution previous used for the resizing 52# I have removed this feature since it was not reliable, 53# but leaving this here for reference 54# (also implementing resizing would be a good idea, 55# but more general and not for each dataset individually) 56# NATIVE_RESOLUTION = (0.235, 0.075, 0.075) 57 58 59def get_plantseg_data(path: Union[os.PathLike, str], download: bool, name: str, split: str) -> str: 60 """Download the PlantSeg training data. 61 62 Args: 63 path: Filepath to a folder where the downloaded data will be saved. 64 download: Whether to download the data if it is not present. 65 name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'. 66 split: The split to download. Either 'train', 'val' or 'test'. 67 68 Returns: 69 The filepath to the training data. 70 """ 71 url = URLS[name][split] 72 checksum = CHECKSUMS[name][split] 73 os.makedirs(path, exist_ok=True) 74 out_path = os.path.join(path, f"{name}_{split}") 75 if os.path.exists(out_path): 76 return out_path 77 tmp_path = os.path.join(path, f"{name}_{split}.zip") 78 util.download_source(tmp_path, url, download, checksum) 79 util.unzip(tmp_path, out_path, remove=True) 80 return out_path 81 82 83def get_plantseg_dataset( 84 path: Union[os.PathLike, str], 85 name: str, 86 split: str, 87 patch_shape: Tuple[int, int, int], 88 download: bool = False, 89 offsets: Optional[List[List[int]]] = None, 90 boundaries: bool = False, 91 binary: bool = False, 92 **kwargs, 93) -> Dataset: 94 """Get the PlantSeg dataset for segmenting nuclei or cells. 95 96 Args: 97 path: Filepath to a folder where the downloaded data will be saved. 98 name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'. 99 split: The split to download. Either 'train', 'val' or 'test'. 100 patch_shape: The patch shape to use for training. 101 download: Whether to download the data if it is not present. 102 offsets: Offset values for affinity computation used as target. 103 boundaries: Whether to compute boundaries as the target. 104 binary: Whether to use a binary segmentation target. 105 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 106 107 Returns: 108 The segmentation dataset. 109 """ 110 assert len(patch_shape) == 3 111 data_path = get_plantseg_data(path, download, name, split) 112 113 file_paths = glob(os.path.join(data_path, "*.h5")) 114 file_paths.sort() 115 116 kwargs, _ = util.add_instance_label_transform( 117 kwargs, add_binary_target=binary, binary=binary, boundaries=boundaries, 118 offsets=offsets, binary_is_exclusive=False 119 ) 120 121 raw_key, label_key = "raw", "label" 122 return torch_em.default_segmentation_dataset(file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs) 123 124 125# TODO add support for ignore label, key: "/label_with_ignore" 126def get_plantseg_loader( 127 path: Union[os.PathLike, str], 128 name: str, 129 split: str, 130 patch_shape: Tuple[int, int, int], 131 batch_size: int, 132 download: bool = False, 133 offsets: Optional[List[List[int]]] = None, 134 boundaries: bool = False, 135 binary: bool = False, 136 **kwargs, 137) -> DataLoader: 138 """Get the PlantSeg dataloader for segmenting nuclei or cells. 139 140 Args: 141 path: Filepath to a folder where the downloaded data will be saved. 142 name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'. 143 split: The split to download. Either 'train', 'val' or 'test'. 144 patch_shape: The patch shape to use for training. 145 batch_size: The batch size for training. 146 download: Whether to download the data if it is not present. 147 offsets: Offset values for affinity computation used as target. 148 boundaries: Whether to compute boundaries as the target. 149 binary: Whether to use a binary segmentation target. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 151 152 Returns: 153 The DataLoader. 154 """ 155 ds_kwargs, loader_kwargs = util.split_kwargs( 156 torch_em.default_segmentation_dataset, **kwargs 157 ) 158 dataset = get_plantseg_dataset( 159 path, name, split, patch_shape, 160 download=download, offsets=offsets, boundaries=boundaries, binary=binary, 161 **ds_kwargs 162 ) 163 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 164 return loader
URLS =
{'root': {'train': 'https://files.de-1.osf.io/v1/resources/9x3g2/providers/osfstorage/?zip=', 'val': 'https://files.de-1.osf.io/v1/resources/vs6gb/providers/osfstorage/?zip=', 'test': 'https://files.de-1.osf.io/v1/resources/tn4xj/providers/osfstorage/?zip='}, 'nuclei': {'train': 'https://files.de-1.osf.io/v1/resources/thxzn/providers/osfstorage/?zip='}, 'ovules': {'train': 'https://files.de-1.osf.io/v1/resources/x9yns/providers/osfstorage/?zip=', 'val': 'https://files.de-1.osf.io/v1/resources/xp5uf/providers/osfstorage/?zip=', 'test': 'https://files.de-1.osf.io/v1/resources/8jz7e/providers/osfstorage/?zip='}}
CHECKSUMS =
{'root': {'train': None, 'val': None, 'test': None}, 'nuclei': {'train': None}, 'ovules': {'train': None, 'val': None, 'test': None}}
def
get_plantseg_data( path: Union[os.PathLike, str], download: bool, name: str, split: str) -> str:
60def get_plantseg_data(path: Union[os.PathLike, str], download: bool, name: str, split: str) -> str: 61 """Download the PlantSeg training data. 62 63 Args: 64 path: Filepath to a folder where the downloaded data will be saved. 65 download: Whether to download the data if it is not present. 66 name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'. 67 split: The split to download. Either 'train', 'val' or 'test'. 68 69 Returns: 70 The filepath to the training data. 71 """ 72 url = URLS[name][split] 73 checksum = CHECKSUMS[name][split] 74 os.makedirs(path, exist_ok=True) 75 out_path = os.path.join(path, f"{name}_{split}") 76 if os.path.exists(out_path): 77 return out_path 78 tmp_path = os.path.join(path, f"{name}_{split}.zip") 79 util.download_source(tmp_path, url, download, checksum) 80 util.unzip(tmp_path, out_path, remove=True) 81 return out_path
Download the PlantSeg training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
- name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'.
- split: The split to download. Either 'train', 'val' or 'test'.
Returns:
The filepath to the training data.
def
get_plantseg_dataset( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
84def get_plantseg_dataset( 85 path: Union[os.PathLike, str], 86 name: str, 87 split: str, 88 patch_shape: Tuple[int, int, int], 89 download: bool = False, 90 offsets: Optional[List[List[int]]] = None, 91 boundaries: bool = False, 92 binary: bool = False, 93 **kwargs, 94) -> Dataset: 95 """Get the PlantSeg dataset for segmenting nuclei or cells. 96 97 Args: 98 path: Filepath to a folder where the downloaded data will be saved. 99 name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'. 100 split: The split to download. Either 'train', 'val' or 'test'. 101 patch_shape: The patch shape to use for training. 102 download: Whether to download the data if it is not present. 103 offsets: Offset values for affinity computation used as target. 104 boundaries: Whether to compute boundaries as the target. 105 binary: Whether to use a binary segmentation target. 106 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 107 108 Returns: 109 The segmentation dataset. 110 """ 111 assert len(patch_shape) == 3 112 data_path = get_plantseg_data(path, download, name, split) 113 114 file_paths = glob(os.path.join(data_path, "*.h5")) 115 file_paths.sort() 116 117 kwargs, _ = util.add_instance_label_transform( 118 kwargs, add_binary_target=binary, binary=binary, boundaries=boundaries, 119 offsets=offsets, binary_is_exclusive=False 120 ) 121 122 raw_key, label_key = "raw", "label" 123 return torch_em.default_segmentation_dataset(file_paths, raw_key, file_paths, label_key, patch_shape, **kwargs)
Get the PlantSeg dataset for segmenting nuclei or cells.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'.
- split: The split to download. Either 'train', 'val' or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_plantseg_loader( path: Union[os.PathLike, str], name: str, split: str, patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
127def get_plantseg_loader( 128 path: Union[os.PathLike, str], 129 name: str, 130 split: str, 131 patch_shape: Tuple[int, int, int], 132 batch_size: int, 133 download: bool = False, 134 offsets: Optional[List[List[int]]] = None, 135 boundaries: bool = False, 136 binary: bool = False, 137 **kwargs, 138) -> DataLoader: 139 """Get the PlantSeg dataloader for segmenting nuclei or cells. 140 141 Args: 142 path: Filepath to a folder where the downloaded data will be saved. 143 name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'. 144 split: The split to download. Either 'train', 'val' or 'test'. 145 patch_shape: The patch shape to use for training. 146 batch_size: The batch size for training. 147 download: Whether to download the data if it is not present. 148 offsets: Offset values for affinity computation used as target. 149 boundaries: Whether to compute boundaries as the target. 150 binary: Whether to use a binary segmentation target. 151 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 152 153 Returns: 154 The DataLoader. 155 """ 156 ds_kwargs, loader_kwargs = util.split_kwargs( 157 torch_em.default_segmentation_dataset, **kwargs 158 ) 159 dataset = get_plantseg_dataset( 160 path, name, split, patch_shape, 161 download=download, offsets=offsets, boundaries=boundaries, binary=binary, 162 **ds_kwargs 163 ) 164 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 165 return loader
Get the PlantSeg dataloader for segmenting nuclei or cells.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- name: The name of the data to load. Either 'root', 'nuclei' or 'ovules'.
- split: The split to download. Either 'train', 'val' or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.