torch_em.data.datasets.electron_microscopy.fafb_nuclei
This dataset contains crops of EM data with annotated nuclei from the adult fruit fly brain.
The data is extracted from https://doi.org/10.1101/2021.11.04.46719, which contains a segmentation of all nuclei in the fruit fly brain. Please cite it if you use this dataset for a publication.
1"""This dataset contains crops of EM data with annotated nuclei from the adult fruit fly brain. 2 3The data is extracted from https://doi.org/10.1101/2021.11.04.46719, which contains a segmentation 4of all nuclei in the fruit fly brain. Please cite it if you use this dataset for a publication. 5""" 6 7import os 8from glob import glob 9from typing import Tuple, Union, Literal, List 10 11import torch_em 12 13from torch.utils.data import Dataset, DataLoader 14 15from .. import util 16 17 18URL = "https://owncloud.gwdg.de/index.php/s/PDrkO02w7FWLrsh/download" 19CHECKSUM = "7f245866fa8ffb473f7bda1fbbea23d265183e4ada5e34a36ecd60475809dca3" 20 21 22def get_fafb_nuclei_data(path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool) -> str: 23 """Download the FAFB Nucleus data. 24 25 Args: 26 path: Filepath to a folder where the downloaded data will be saved. 27 split: The split to use. One of 'train', 'val', 'test'. 28 download: Whether to download the data if it is not present. 29 30 Returns: 31 The filepath to the downloaded data. 32 """ 33 assert split in ("train", "val", "test") 34 split_folder = os.path.join(path, split) 35 if not os.path.exists(split_folder): 36 os.makedirs(path, exist_ok=True) 37 zip_path = os.path.join(path, "fafb_nucleus_data.zip") 38 util.download_source(zip_path, URL, download, CHECKSUM) 39 util.unzip(zip_path, path, remove=True) 40 return split_folder 41 42 43def get_fafb_nuclei_paths( 44 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool 45) -> List[str]: 46 """Get paths to the FAFB Nucleus data. 47 48 Args: 49 path: Filepath to a folder where the downloaded data will be saved. 50 split: The split to use. One of 'train', 'val', 'test'. 51 download: Whether to download the data if it is not present. 52 53 Returns: 54 The filepaths to the stored data. 55 """ 56 get_fafb_nuclei_data(path, split, download) 57 split_folder = os.path.join(path, split) 58 paths = sorted(glob(os.path.join(split_folder, "*.h5"))) 59 return paths 60 61 62def get_fafb_nuclei_dataset( 63 path: Union[os.PathLike, str], 64 split: Literal["train", "val", "test"], 65 patch_shape: Tuple[int, int, int], 66 download: bool = False, 67 **kwargs 68) -> Dataset: 69 """Get the FAFB nucleus dataset for the segmentation of nuclei in EM. 70 71 Args: 72 path: Filepath to a folder where the downloaded data will be saved. 73 split: The split for the dataset, either 'train, 'val', or 'test'. 74 patch_shape: The patch shape to use for training. 75 download: Whether to download the data if it is not present. 76 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 77 78 Returns: 79 The segmentation dataset. 80 """ 81 paths = get_fafb_nuclei_paths(path, split, download) 82 return torch_em.default_segmentation_dataset( 83 raw_paths=paths, 84 raw_key="raw", 85 label_paths=paths, 86 label_key="labels/nuclei", 87 patch_shape=patch_shape, 88 is_seg_dataset=True, 89 **kwargs 90 ) 91 92 93def get_fafb_nuclei_loader( 94 path: Union[os.PathLike, str], 95 split: Literal["train", "val", "test"], 96 patch_shape: Tuple[int, int, int], 97 batch_size: int, 98 download: bool = False, 99 **kwargs 100) -> DataLoader: 101 """Get the FAFB nucleus dataloader for the segmentation of nuclei in EM. 102 103 Args: 104 path: Filepath to a folder where the downloaded data will be saved. 105 split: The split for the dataset, either 'train', 'val', or 'test'. 106 patch_shape: The patch shape to use for training. 107 batch_size: The batch size for training. 108 download: Whether to download the data if it is not present. 109 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 110 111 Returns: 112 The segmentation dataset. 113 """ 114 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 115 ds = get_fafb_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs) 116 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
URL =
'https://owncloud.gwdg.de/index.php/s/PDrkO02w7FWLrsh/download'
CHECKSUM =
'7f245866fa8ffb473f7bda1fbbea23d265183e4ada5e34a36ecd60475809dca3'
def
get_fafb_nuclei_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> str:
23def get_fafb_nuclei_data(path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool) -> str: 24 """Download the FAFB Nucleus data. 25 26 Args: 27 path: Filepath to a folder where the downloaded data will be saved. 28 split: The split to use. One of 'train', 'val', 'test'. 29 download: Whether to download the data if it is not present. 30 31 Returns: 32 The filepath to the downloaded data. 33 """ 34 assert split in ("train", "val", "test") 35 split_folder = os.path.join(path, split) 36 if not os.path.exists(split_folder): 37 os.makedirs(path, exist_ok=True) 38 zip_path = os.path.join(path, "fafb_nucleus_data.zip") 39 util.download_source(zip_path, URL, download, CHECKSUM) 40 util.unzip(zip_path, path, remove=True) 41 return split_folder
Download the FAFB Nucleus data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use. One of 'train', 'val', 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the downloaded data.
def
get_fafb_nuclei_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], download: bool) -> List[str]:
44def get_fafb_nuclei_paths( 45 path: Union[os.PathLike, str], split: Literal["train", "val", "test"], download: bool 46) -> List[str]: 47 """Get paths to the FAFB Nucleus data. 48 49 Args: 50 path: Filepath to a folder where the downloaded data will be saved. 51 split: The split to use. One of 'train', 'val', 'test'. 52 download: Whether to download the data if it is not present. 53 54 Returns: 55 The filepaths to the stored data. 56 """ 57 get_fafb_nuclei_data(path, split, download) 58 split_folder = os.path.join(path, split) 59 paths = sorted(glob(os.path.join(split_folder, "*.h5"))) 60 return paths
Get paths to the FAFB Nucleus data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use. One of 'train', 'val', 'test'.
- download: Whether to download the data if it is not present.
Returns:
The filepaths to the stored data.
def
get_fafb_nuclei_dataset( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
63def get_fafb_nuclei_dataset( 64 path: Union[os.PathLike, str], 65 split: Literal["train", "val", "test"], 66 patch_shape: Tuple[int, int, int], 67 download: bool = False, 68 **kwargs 69) -> Dataset: 70 """Get the FAFB nucleus dataset for the segmentation of nuclei in EM. 71 72 Args: 73 path: Filepath to a folder where the downloaded data will be saved. 74 split: The split for the dataset, either 'train, 'val', or 'test'. 75 patch_shape: The patch shape to use for training. 76 download: Whether to download the data if it is not present. 77 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 78 79 Returns: 80 The segmentation dataset. 81 """ 82 paths = get_fafb_nuclei_paths(path, split, download) 83 return torch_em.default_segmentation_dataset( 84 raw_paths=paths, 85 raw_key="raw", 86 label_paths=paths, 87 label_key="labels/nuclei", 88 patch_shape=patch_shape, 89 is_seg_dataset=True, 90 **kwargs 91 )
Get the FAFB nucleus dataset for the segmentation of nuclei in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split for the dataset, either 'train, 'val', or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_fafb_nuclei_loader( path: Union[os.PathLike, str], split: Literal['train', 'val', 'test'], patch_shape: Tuple[int, int, int], batch_size: int, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
94def get_fafb_nuclei_loader( 95 path: Union[os.PathLike, str], 96 split: Literal["train", "val", "test"], 97 patch_shape: Tuple[int, int, int], 98 batch_size: int, 99 download: bool = False, 100 **kwargs 101) -> DataLoader: 102 """Get the FAFB nucleus dataloader for the segmentation of nuclei in EM. 103 104 Args: 105 path: Filepath to a folder where the downloaded data will be saved. 106 split: The split for the dataset, either 'train', 'val', or 'test'. 107 patch_shape: The patch shape to use for training. 108 batch_size: The batch size for training. 109 download: Whether to download the data if it is not present. 110 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 111 112 Returns: 113 The segmentation dataset. 114 """ 115 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 116 ds = get_fafb_nuclei_dataset(path, split, patch_shape, download, **ds_kwargs) 117 return torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs)
Get the FAFB nucleus dataloader for the segmentation of nuclei in EM.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split for the dataset, either 'train', 'val', or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The segmentation dataset.