torch_em.data.datasets.electron_microscopy.aimseg
AimSeg contanins annotations for semantic segmentation of myelins, axons and inner cytoplasmic tongue, and for instance segmentation of myelinated axons in TEM images of corpus callosum (CC) in adult mice.
This dataset is available at https://zenodo.org/records/8351731. The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1010845. Please cite them if you use this dataset in your research.
1"""AimSeg contanins annotations for semantic segmentation of myelins, axons and inner cytoplasmic tongue, 2and for instance segmentation of myelinated axons in TEM images of corpus callosum (CC) in adult mice. 3 4This dataset is available at https://zenodo.org/records/8351731. 5The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1010845. 6Please cite them if you use this dataset in your research. 7""" 8 9import os 10from glob import glob 11from pathlib import Path 12from natsort import natsorted 13from typing import Tuple, List, Union, Optional, Literal 14 15from torch.utils.data import Dataset, DataLoader 16 17import torch_em 18 19from .. import util 20 21 22URLS = [ 23 "https://zenodo.org/records/8351731/files/Control_Dataset.rar", 24 "https://zenodo.org/records/8351731/files/Validation_Dataset_v1.rar", 25] 26 27CHECKSUMS = [ 28 "ecd569a5f91166a09d93d29a10e2ddd2eaa3e82df531785b7aa243e426467673", 29 "647216eb09a644be8980224a52d8168fa2fa5a1fd0537fb1e5d6102ec30e396d" 30] 31 32 33def get_aimseg_data(path: Union[os.PathLike, str], download: bool = False): 34 """Get the AimSeg data. 35 36 Args: 37 path: Path to a folder where the data is downloaded. 38 download: Whether to download the data if it is not present. 39 """ 40 for url, checksum in zip(URLS, CHECKSUMS): 41 rarfname = url.rsplit("/")[-1] 42 dirname = Path(rarfname).stem 43 44 if os.path.exists(os.path.join(path, dirname)): 45 continue 46 47 os.makedirs(path, exist_ok=True) 48 49 util.download_source(path=os.path.join(path, rarfname), url=url, download=download, checksum=checksum) 50 util.unzip_rarfile(rar_path=os.path.join(path, rarfname), dst=path) 51 52 53def get_aimseg_paths( 54 path: Union[os.PathLike, str], 55 split: Optional[Literal["control", "validation"]] = None, 56 targets: Literal["instances", "semantic"] = "instances", 57 download: bool = False, 58) -> Tuple[List[str], List[str]]: 59 """Get paths to the AimSeg data. 60 61 Args: 62 path: Filepath to a folder where the data is downloaded. 63 split: The split of the data to be used for training. 64 Either `control` focused on healthy control specimen, 65 or `validation` focused on mice undergoing remyelination. 66 targets: The choice of support labels for the task. 67 Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins. 68 download: Whether to download the data if it is not present. 69 70 Returns: 71 List of filepaths for the image data. 72 List of filepaths for the label data. 73 """ 74 # Download the AimSeg data. 75 get_aimseg_data(path, download) 76 77 # Get the directory name for desired targets. 78 if targets == "instances": 79 dirname = "GroundTruth_Instance" 80 elif targets == "semantic": 81 dirname = "GroundTruth_Semantic" 82 else: 83 raise ValueError(f"'{targets}' is not a valid target choice. Please choose from 'instances' / 'semantic'.") 84 85 # Get the paths to image and corresponding labels 86 raw_paths, label_paths = [], [] 87 if split and split not in ["control", "validation"]: 88 raise ValueError(f"'{split}' is not a valid split choice. Please choose from 'control' / 'validation'.") 89 90 if split != "validation": 91 raw_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", "Images", "*.tif")))) 92 label_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", dirname, "*.tif")))) 93 94 if split != "control": 95 raw_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", "Images", "*.tif")))) 96 label_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", dirname, "*.tif")))) 97 98 assert raw_paths and len(raw_paths) == len(label_paths) 99 100 return raw_paths, label_paths 101 102 103def get_aimseg_dataset( 104 path: Union[os.PathLike, str], 105 patch_shape: Tuple[int, int], 106 targets: Literal["instances", "semantic"] = "instances", 107 download: bool = False, 108 **kwargs 109) -> Dataset: 110 """Get the AimSeg dataset for axon and myelin segmentation. 111 112 Args: 113 path: Filepath to a folder where the data is downloaded. 114 patch_shape: The patch shape to use for training. 115 targets: The choice of support labels for the task. 116 Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins. 117 download: Whether to download the data if it is not present. 118 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 119 120 Returns: 121 The segmentation dataset. 122 """ 123 raw_paths, label_paths = get_aimseg_paths(path, None, targets, download) 124 125 return torch_em.default_segmentation_dataset( 126 raw_paths=raw_paths, 127 raw_key=None, 128 label_paths=label_paths, 129 label_key=None, 130 patch_shape=patch_shape, 131 **kwargs 132 ) 133 134 135def get_aimseg_loader( 136 path: Union[os.PathLike, str], 137 batch_size: int, 138 patch_shape: Tuple[int, int], 139 targets: Literal["instances", "semantic"] = "instances", 140 download: bool = False, 141 **kwargs 142) -> DataLoader: 143 """Get the AimSeg dataset for axon and myelin segmentation. 144 145 Args: 146 path: Filepath to a folder where the data is downloaded. 147 batch_size: The batch size for training. 148 patch_shape: The patch shape to use for training. 149 targets: The choice of support labels for the task. 150 Either 'instances' for annotated myelinated axons or 'semantic' for axons, inner tongue and myelins. 151 download: Whether to download the data if it is not present. 152 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 153 154 Returns: 155 The DataLoader 156 """ 157 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 158 dataset = get_aimseg_dataset(path, patch_shape, targets, download, **ds_kwargs) 159 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URLS =
['https://zenodo.org/records/8351731/files/Control_Dataset.rar', 'https://zenodo.org/records/8351731/files/Validation_Dataset_v1.rar']
CHECKSUMS =
['ecd569a5f91166a09d93d29a10e2ddd2eaa3e82df531785b7aa243e426467673', '647216eb09a644be8980224a52d8168fa2fa5a1fd0537fb1e5d6102ec30e396d']
def
get_aimseg_data(path: Union[os.PathLike, str], download: bool = False):
34def get_aimseg_data(path: Union[os.PathLike, str], download: bool = False): 35 """Get the AimSeg data. 36 37 Args: 38 path: Path to a folder where the data is downloaded. 39 download: Whether to download the data if it is not present. 40 """ 41 for url, checksum in zip(URLS, CHECKSUMS): 42 rarfname = url.rsplit("/")[-1] 43 dirname = Path(rarfname).stem 44 45 if os.path.exists(os.path.join(path, dirname)): 46 continue 47 48 os.makedirs(path, exist_ok=True) 49 50 util.download_source(path=os.path.join(path, rarfname), url=url, download=download, checksum=checksum) 51 util.unzip_rarfile(rar_path=os.path.join(path, rarfname), dst=path)
Get the AimSeg data.
Arguments:
- path: Path to a folder where the data is downloaded.
- download: Whether to download the data if it is not present.
def
get_aimseg_paths( path: Union[os.PathLike, str], split: Optional[Literal['control', 'validation']] = None, targets: Literal['instances', 'semantic'] = 'instances', download: bool = False) -> Tuple[List[str], List[str]]:
54def get_aimseg_paths( 55 path: Union[os.PathLike, str], 56 split: Optional[Literal["control", "validation"]] = None, 57 targets: Literal["instances", "semantic"] = "instances", 58 download: bool = False, 59) -> Tuple[List[str], List[str]]: 60 """Get paths to the AimSeg data. 61 62 Args: 63 path: Filepath to a folder where the data is downloaded. 64 split: The split of the data to be used for training. 65 Either `control` focused on healthy control specimen, 66 or `validation` focused on mice undergoing remyelination. 67 targets: The choice of support labels for the task. 68 Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins. 69 download: Whether to download the data if it is not present. 70 71 Returns: 72 List of filepaths for the image data. 73 List of filepaths for the label data. 74 """ 75 # Download the AimSeg data. 76 get_aimseg_data(path, download) 77 78 # Get the directory name for desired targets. 79 if targets == "instances": 80 dirname = "GroundTruth_Instance" 81 elif targets == "semantic": 82 dirname = "GroundTruth_Semantic" 83 else: 84 raise ValueError(f"'{targets}' is not a valid target choice. Please choose from 'instances' / 'semantic'.") 85 86 # Get the paths to image and corresponding labels 87 raw_paths, label_paths = [], [] 88 if split and split not in ["control", "validation"]: 89 raise ValueError(f"'{split}' is not a valid split choice. Please choose from 'control' / 'validation'.") 90 91 if split != "validation": 92 raw_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", "Images", "*.tif")))) 93 label_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", dirname, "*.tif")))) 94 95 if split != "control": 96 raw_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", "Images", "*.tif")))) 97 label_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", dirname, "*.tif")))) 98 99 assert raw_paths and len(raw_paths) == len(label_paths) 100 101 return raw_paths, label_paths
Get paths to the AimSeg data.
Arguments:
- path: Filepath to a folder where the data is downloaded.
- split: The split of the data to be used for training.
Either
control
focused on healthy control specimen, orvalidation
focused on mice undergoing remyelination. - targets: The choice of support labels for the task.
Either
instances
for annotated myelinated axons orsemantic
for axons, inner tongue and myelins. - download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_aimseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], targets: Literal['instances', 'semantic'] = 'instances', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
104def get_aimseg_dataset( 105 path: Union[os.PathLike, str], 106 patch_shape: Tuple[int, int], 107 targets: Literal["instances", "semantic"] = "instances", 108 download: bool = False, 109 **kwargs 110) -> Dataset: 111 """Get the AimSeg dataset for axon and myelin segmentation. 112 113 Args: 114 path: Filepath to a folder where the data is downloaded. 115 patch_shape: The patch shape to use for training. 116 targets: The choice of support labels for the task. 117 Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins. 118 download: Whether to download the data if it is not present. 119 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 120 121 Returns: 122 The segmentation dataset. 123 """ 124 raw_paths, label_paths = get_aimseg_paths(path, None, targets, download) 125 126 return torch_em.default_segmentation_dataset( 127 raw_paths=raw_paths, 128 raw_key=None, 129 label_paths=label_paths, 130 label_key=None, 131 patch_shape=patch_shape, 132 **kwargs 133 )
Get the AimSeg dataset for axon and myelin segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded.
- patch_shape: The patch shape to use for training.
- targets: The choice of support labels for the task.
Either
instances
for annotated myelinated axons orsemantic
for axons, inner tongue and myelins. - download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_aimseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], targets: Literal['instances', 'semantic'] = 'instances', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
136def get_aimseg_loader( 137 path: Union[os.PathLike, str], 138 batch_size: int, 139 patch_shape: Tuple[int, int], 140 targets: Literal["instances", "semantic"] = "instances", 141 download: bool = False, 142 **kwargs 143) -> DataLoader: 144 """Get the AimSeg dataset for axon and myelin segmentation. 145 146 Args: 147 path: Filepath to a folder where the data is downloaded. 148 batch_size: The batch size for training. 149 patch_shape: The patch shape to use for training. 150 targets: The choice of support labels for the task. 151 Either 'instances' for annotated myelinated axons or 'semantic' for axons, inner tongue and myelins. 152 download: Whether to download the data if it is not present. 153 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 154 155 Returns: 156 The DataLoader 157 """ 158 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 159 dataset = get_aimseg_dataset(path, patch_shape, targets, download, **ds_kwargs) 160 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the AimSeg dataset for axon and myelin segmentation.
Arguments:
- path: Filepath to a folder where the data is downloaded.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- targets: The choice of support labels for the task. Either 'instances' for annotated myelinated axons or 'semantic' for axons, inner tongue and myelins.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader