torch_em.data.datasets.electron_microscopy.lucchi
The Lucchi dataset is a segmentation dataset for mitochondrion segmentation in electron microscopy.
The dataset was published in https://doi.org/10.48550/arXiv.1812.06024. Please cite this publication if you use the dataset in your research. We use the version of the dataset from https://sites.google.com/view/connectomics/.
1"""The Lucchi dataset is a segmentation dataset for mitochondrion segmentation in electron microscopy. 2 3The dataset was published in https://doi.org/10.48550/arXiv.1812.06024. 4Please cite this publication if you use the dataset in your research. 5We use the version of the dataset from https://sites.google.com/view/connectomics/. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from shutil import rmtree 12from concurrent import futures 13from typing import Tuple, Union, Literal 14 15import imageio 16import numpy as np 17 18import torch_em 19 20from torch.utils.data import Dataset, DataLoader 21 22from .. import util 23 24 25URL = "http://www.casser.io/files/lucchi_pp.zip" 26CHECKSUM = "770ce9e98fc6f29c1b1a250c637e6c5125f2b5f1260e5a7687b55a79e2e8844d" 27 28 29def _load_volume(path, pattern): 30 nz = len(glob(os.path.join(path, "*.png"))) 31 im0 = imageio.imread(os.path.join(path, pattern % 0)) 32 out = np.zeros((nz,) + im0.shape, dtype=im0.dtype) 33 out[0] = im0 34 35 def _loadz(z): 36 im = imageio.imread(os.path.join(path, pattern % z)) 37 out[z] = im 38 39 n_threads = 8 40 with futures.ThreadPoolExecutor(n_threads) as tp: 41 list(tqdm( 42 tp.map(_loadz, range(1, nz)), desc="Load volume", total=nz-1 43 )) 44 45 return out 46 47 48def _create_data(root, inputs, out_path): 49 import h5py 50 51 raw = _load_volume(os.path.join(root, inputs[0]), pattern="mask%04i.png") 52 labels_argb = _load_volume(os.path.join(root, inputs[1]), pattern="%i.png") 53 if labels_argb.ndim == 4: 54 labels = np.zeros(raw.shape, dtype="uint8") 55 fg_mask = (labels_argb == np.array([255, 255, 255, 255])[None, None, None]).all(axis=-1) 56 labels[fg_mask] = 1 57 else: 58 assert labels_argb.ndim == 3 59 labels = labels_argb 60 labels[labels == 255] = 1 61 assert (np.unique(labels) == np.array([0, 1])).all() 62 assert raw.shape == labels.shape, f"{raw.shape}, {labels.shape}" 63 with h5py.File(out_path, "w") as f: 64 f.create_dataset("raw", data=raw, compression="gzip") 65 f.create_dataset("labels", data=labels.astype("uint8"), compression="gzip") 66 67 68def get_lucchi_data(path: Union[os.PathLike, str], split: Literal["train", "test"], download: bool = False) -> str: 69 """Download the Lucchi dataset. 70 71 Args: 72 path: Filepath to a folder where the downloaded data will be saved. 73 split: The split to download, either 'train' or 'test'. 74 download: Whether to download the data if it is not present. 75 76 Returns: 77 The filepath for the downloaded data. 78 """ 79 data_path = os.path.join(path, f"lucchi_{split}.h5") 80 if os.path.exists(data_path): 81 return data_path 82 83 os.makedirs(path) 84 tmp_path = os.path.join(path, "lucchi.zip") 85 util.download_source(tmp_path, URL, download, checksum=CHECKSUM) 86 util.unzip(tmp_path, path, remove=True) 87 88 root = os.path.join(path, "Lucchi++") 89 assert os.path.exists(root), root 90 91 inputs = [["Test_In", "Test_Out"], ["Train_In", "Train_Out"]] 92 outputs = ["lucchi_train.h5", "lucchi_test.h5"] 93 for inp, out in zip(inputs, outputs): 94 out_path = os.path.join(path, out) 95 _create_data(root, inp, out_path) 96 rmtree(root) 97 98 assert os.path.exists(data_path), data_path 99 return data_path 100 101 102def get_lucchi_paths(path: Union[os.PathLike, str], split: Literal["train", "test"], download: bool = False) -> str: 103 """Get paths to the Lucchi data. 104 105 Args: 106 path: Filepath to a folder where the downloaded data will be saved. 107 split: The data split. Either 'train' or 'test'. 108 download: Whether to download the data if it is not present. 109 110 Returns: 111 The filepath for the stored data. 112 """ 113 get_lucchi_data(path, split, download) 114 data_path = os.path.join(path, f"lucchi_{split}.h5") 115 return data_path 116 117 118def get_lucchi_dataset( 119 path: Union[os.PathLike, str], 120 split: Literal["train", "test"], 121 patch_shape: Tuple[int, int, int], 122 download: bool = False, 123 **kwargs 124) -> Dataset: 125 """Get dataset for EM mitochondrion segmentation in the Lucchi dataset. 126 127 Args: 128 path: Filepath to a folder where the downloaded data will be saved. 129 split: The data split. Either 'train' or 'test'. 130 patch_shape: The patch shape to use for training. 131 download: Whether to download the data if it is not present. 132 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 133 134 Returns: 135 The segmentation dataset. 136 """ 137 assert split in ("train", "test") 138 139 data_path = get_lucchi_paths(path, split, download) 140 141 return torch_em.default_segmentation_dataset( 142 raw_paths=data_path, 143 raw_key="raw", 144 label_paths=data_path, 145 label_key="labels", 146 patch_shape=patch_shape, 147 **kwargs 148 ) 149 150 151def get_lucchi_loader( 152 path: Union[os.PathLike, str], 153 split: Literal["train", "test"], 154 patch_shape: Tuple[int, int, int], 155 batch_size: int, 156 download: bool = False, 157 **kwargs 158) -> DataLoader: 159 """Get dataloader for EM mitochondrion segmentation in the Lucchi dataset. 160 161 Args: 162 path: Filepath to a folder where the downloaded data will be saved. 163 split: The data split. Either 'train' or 'test'. 164 patch_shape: The patch shape to use for training. 165 batch_size: The batch size for training. 166 download: Whether to download the data if it is not present. 167 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 168 169 Returns: 170 The PyTorch DataLoader. 171 """ 172 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 173 dataset = get_lucchi_dataset(path, split, patch_shape, download=download, **ds_kwargs) 174 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'http://www.casser.io/files/lucchi_pp.zip'
CHECKSUM =
'770ce9e98fc6f29c1b1a250c637e6c5125f2b5f1260e5a7687b55a79e2e8844d'
def
get_lucchi_data( path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str:
69def get_lucchi_data(path: Union[os.PathLike, str], split: Literal["train", "test"], download: bool = False) -> str: 70 """Download the Lucchi dataset. 71 72 Args: 73 path: Filepath to a folder where the downloaded data will be saved. 74 split: The split to download, either 'train' or 'test'. 75 download: Whether to download the data if it is not present. 76 77 Returns: 78 The filepath for the downloaded data. 79 """ 80 data_path = os.path.join(path, f"lucchi_{split}.h5") 81 if os.path.exists(data_path): 82 return data_path 83 84 os.makedirs(path) 85 tmp_path = os.path.join(path, "lucchi.zip") 86 util.download_source(tmp_path, URL, download, checksum=CHECKSUM) 87 util.unzip(tmp_path, path, remove=True) 88 89 root = os.path.join(path, "Lucchi++") 90 assert os.path.exists(root), root 91 92 inputs = [["Test_In", "Test_Out"], ["Train_In", "Train_Out"]] 93 outputs = ["lucchi_train.h5", "lucchi_test.h5"] 94 for inp, out in zip(inputs, outputs): 95 out_path = os.path.join(path, out) 96 _create_data(root, inp, out_path) 97 rmtree(root) 98 99 assert os.path.exists(data_path), data_path 100 return data_path
Download the Lucchi dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to download, either 'train' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the downloaded data.
def
get_lucchi_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str:
103def get_lucchi_paths(path: Union[os.PathLike, str], split: Literal["train", "test"], download: bool = False) -> str: 104 """Get paths to the Lucchi data. 105 106 Args: 107 path: Filepath to a folder where the downloaded data will be saved. 108 split: The data split. Either 'train' or 'test'. 109 download: Whether to download the data if it is not present. 110 111 Returns: 112 The filepath for the stored data. 113 """ 114 get_lucchi_data(path, split, download) 115 data_path = os.path.join(path, f"lucchi_{split}.h5") 116 return data_path
Get paths to the Lucchi data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split. Either 'train' or 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepath for the stored data.
def
get_lucchi_dataset( path: Union[os.PathLike, str], split: Literal['train', 'test'], patch_shape: Tuple[int, int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
119def get_lucchi_dataset( 120 path: Union[os.PathLike, str], 121 split: Literal["train", "test"], 122 patch_shape: Tuple[int, int, int], 123 download: bool = False, 124 **kwargs 125) -> Dataset: 126 """Get dataset for EM mitochondrion segmentation in the Lucchi dataset. 127 128 Args: 129 path: Filepath to a folder where the downloaded data will be saved. 130 split: The data split. Either 'train' or 'test'. 131 patch_shape: The patch shape to use for training. 132 download: Whether to download the data if it is not present. 133 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 134 135 Returns: 136 The segmentation dataset. 137 """ 138 assert split in ("train", "test") 139 140 data_path = get_lucchi_paths(path, split, download) 141 142 return torch_em.default_segmentation_dataset( 143 raw_paths=data_path, 144 raw_key="raw", 145 label_paths=data_path, 146 label_key="labels", 147 patch_shape=patch_shape, 148 **kwargs 149 )
Get dataset for EM mitochondrion segmentation in the Lucchi dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split. Either 'train' or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_lucchi_loader( path: Union[os.PathLike, str], split: Literal['train', 'test'], patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
152def get_lucchi_loader( 153 path: Union[os.PathLike, str], 154 split: Literal["train", "test"], 155 patch_shape: Tuple[int, int, int], 156 batch_size: int, 157 download: bool = False, 158 **kwargs 159) -> DataLoader: 160 """Get dataloader for EM mitochondrion segmentation in the Lucchi dataset. 161 162 Args: 163 path: Filepath to a folder where the downloaded data will be saved. 164 split: The data split. Either 'train' or 'test'. 165 patch_shape: The patch shape to use for training. 166 batch_size: The batch size for training. 167 download: Whether to download the data if it is not present. 168 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 169 170 Returns: 171 The PyTorch DataLoader. 172 """ 173 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 174 dataset = get_lucchi_dataset(path, split, patch_shape, download=download, **ds_kwargs) 175 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get dataloader for EM mitochondrion segmentation in the Lucchi dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split. Either 'train' or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The PyTorch DataLoader.