torch_em.data.datasets.electron_microscopy.aimseg

AimSeg contanins annotations for semantic segmentation of myelins, axons and inner cytoplasmic tongue, and for instance segmentation of myelinated axons in TEM images of corpus callosum (CC) in adult mice.

This dataset is available at https://zenodo.org/records/8351731. The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1010845. Please cite them if you use this dataset in your research.

  1"""AimSeg contanins annotations for semantic segmentation of myelins, axons and inner cytoplasmic tongue,
  2and for instance segmentation of myelinated axons in TEM images of corpus callosum (CC) in adult mice.
  3
  4This dataset is available at https://zenodo.org/records/8351731.
  5The dataset is from the publication https://doi.org/10.1371/journal.pcbi.1010845.
  6Please cite them if you use this dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from pathlib import Path
 12from natsort import natsorted
 13from typing import Tuple, List, Union, Optional, Literal
 14
 15from torch.utils.data import Dataset, DataLoader
 16
 17import torch_em
 18
 19from .. import util
 20
 21
 22URLS = [
 23    "https://zenodo.org/records/8351731/files/Control_Dataset.rar",
 24    "https://zenodo.org/records/8351731/files/Validation_Dataset_v1.rar",
 25]
 26
 27CHECKSUMS = [
 28    "ecd569a5f91166a09d93d29a10e2ddd2eaa3e82df531785b7aa243e426467673",
 29    "647216eb09a644be8980224a52d8168fa2fa5a1fd0537fb1e5d6102ec30e396d"
 30]
 31
 32
 33def get_aimseg_data(path: Union[os.PathLike, str], download: bool = False):
 34    """Get the AimSeg data.
 35
 36    Args:
 37        path: Path to a folder where the data is downloaded.
 38        download: Whether to download the data if it is not present.
 39    """
 40    for url, checksum in zip(URLS, CHECKSUMS):
 41        rarfname = url.rsplit("/")[-1]
 42        dirname = Path(rarfname).stem
 43
 44        if os.path.exists(os.path.join(path, dirname)):
 45            continue
 46
 47        os.makedirs(path, exist_ok=True)
 48
 49        util.download_source(path=os.path.join(path, rarfname), url=url, download=download, checksum=checksum)
 50        util.unzip_rarfile(rar_path=os.path.join(path, rarfname), dst=path)
 51
 52
 53def get_aimseg_paths(
 54    path: Union[os.PathLike, str],
 55    split: Optional[Literal["control", "validation"]] = None,
 56    targets: Literal["instances", "semantic"] = "instances",
 57    download: bool = False,
 58) -> Tuple[List[str], List[str]]:
 59    """Get paths to the AimSeg data.
 60
 61    Args:
 62        path: Filepath to a folder where the data is downloaded.
 63        split: The split of the data to be used for training.
 64            Either `control` focused on healthy control specimen,
 65            or `validation` focused on mice undergoing remyelination.
 66        targets: The choice of support labels for the task.
 67            Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins.
 68        download: Whether to download the data if it is not present.
 69
 70    Returns:
 71        List of filepaths for the image data.
 72        List of filepaths for the label data.
 73    """
 74    # Download the AimSeg data.
 75    get_aimseg_data(path, download)
 76
 77    # Get the directory name for desired targets.
 78    if targets == "instances":
 79        dirname = "GroundTruth_Instance"
 80    elif targets == "semantic":
 81        dirname = "GroundTruth_Semantic"
 82    else:
 83        raise ValueError(f"'{targets}' is not a valid target choice. Please choose from 'instances' / 'semantic'.")
 84
 85    # Get the paths to image and corresponding labels
 86    raw_paths, label_paths = [], []
 87    if split and split not in ["control", "validation"]:
 88        raise ValueError(f"'{split}' is not a valid split choice. Please choose from 'control' / 'validation'.")
 89
 90    if split != "validation":
 91        raw_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", "Images", "*.tif"))))
 92        label_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", dirname, "*.tif"))))
 93
 94    if split != "control":
 95        raw_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", "Images", "*.tif"))))
 96        label_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", dirname, "*.tif"))))
 97
 98    assert raw_paths and len(raw_paths) == len(label_paths)
 99
100    return raw_paths, label_paths
101
102
103def get_aimseg_dataset(
104    path: Union[os.PathLike, str],
105    patch_shape: Tuple[int, int],
106    targets: Literal["instances", "semantic"] = "instances",
107    download: bool = False,
108    **kwargs
109) -> Dataset:
110    """Get the AimSeg dataset for axon and myelin segmentation.
111
112    Args:
113        path: Filepath to a folder where the data is downloaded.
114        patch_shape: The patch shape to use for training.
115        targets: The choice of support labels for the task.
116            Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins.
117        download: Whether to download the data if it is not present.
118        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
119
120    Returns:
121        The segmentation dataset.
122    """
123    raw_paths, label_paths = get_aimseg_paths(path, None, targets, download)
124
125    return torch_em.default_segmentation_dataset(
126        raw_paths=raw_paths,
127        raw_key=None,
128        label_paths=label_paths,
129        label_key=None,
130        patch_shape=patch_shape,
131        **kwargs
132    )
133
134
135def get_aimseg_loader(
136    path: Union[os.PathLike, str],
137    batch_size: int,
138    patch_shape: Tuple[int, int],
139    targets: Literal["instances", "semantic"] = "instances",
140    download: bool = False,
141    **kwargs
142) -> DataLoader:
143    """Get the AimSeg dataset for axon and myelin segmentation.
144
145    Args:
146        path: Filepath to a folder where the data is downloaded.
147        batch_size: The batch size for training.
148        patch_shape: The patch shape to use for training.
149        targets: The choice of support labels for the task.
150            Either 'instances' for annotated myelinated axons or 'semantic' for axons, inner tongue and myelins.
151        download: Whether to download the data if it is not present.
152        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
153
154    Returns:
155        The DataLoader
156    """
157    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
158    dataset = get_aimseg_dataset(path, patch_shape, targets, download, **ds_kwargs)
159    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URLS = ['https://zenodo.org/records/8351731/files/Control_Dataset.rar', 'https://zenodo.org/records/8351731/files/Validation_Dataset_v1.rar']
CHECKSUMS = ['ecd569a5f91166a09d93d29a10e2ddd2eaa3e82df531785b7aa243e426467673', '647216eb09a644be8980224a52d8168fa2fa5a1fd0537fb1e5d6102ec30e396d']
def get_aimseg_data(path: Union[os.PathLike, str], download: bool = False):
34def get_aimseg_data(path: Union[os.PathLike, str], download: bool = False):
35    """Get the AimSeg data.
36
37    Args:
38        path: Path to a folder where the data is downloaded.
39        download: Whether to download the data if it is not present.
40    """
41    for url, checksum in zip(URLS, CHECKSUMS):
42        rarfname = url.rsplit("/")[-1]
43        dirname = Path(rarfname).stem
44
45        if os.path.exists(os.path.join(path, dirname)):
46            continue
47
48        os.makedirs(path, exist_ok=True)
49
50        util.download_source(path=os.path.join(path, rarfname), url=url, download=download, checksum=checksum)
51        util.unzip_rarfile(rar_path=os.path.join(path, rarfname), dst=path)

Get the AimSeg data.

Arguments:
  • path: Path to a folder where the data is downloaded.
  • download: Whether to download the data if it is not present.
def get_aimseg_paths( path: Union[os.PathLike, str], split: Optional[Literal['control', 'validation']] = None, targets: Literal['instances', 'semantic'] = 'instances', download: bool = False) -> Tuple[List[str], List[str]]:
 54def get_aimseg_paths(
 55    path: Union[os.PathLike, str],
 56    split: Optional[Literal["control", "validation"]] = None,
 57    targets: Literal["instances", "semantic"] = "instances",
 58    download: bool = False,
 59) -> Tuple[List[str], List[str]]:
 60    """Get paths to the AimSeg data.
 61
 62    Args:
 63        path: Filepath to a folder where the data is downloaded.
 64        split: The split of the data to be used for training.
 65            Either `control` focused on healthy control specimen,
 66            or `validation` focused on mice undergoing remyelination.
 67        targets: The choice of support labels for the task.
 68            Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins.
 69        download: Whether to download the data if it is not present.
 70
 71    Returns:
 72        List of filepaths for the image data.
 73        List of filepaths for the label data.
 74    """
 75    # Download the AimSeg data.
 76    get_aimseg_data(path, download)
 77
 78    # Get the directory name for desired targets.
 79    if targets == "instances":
 80        dirname = "GroundTruth_Instance"
 81    elif targets == "semantic":
 82        dirname = "GroundTruth_Semantic"
 83    else:
 84        raise ValueError(f"'{targets}' is not a valid target choice. Please choose from 'instances' / 'semantic'.")
 85
 86    # Get the paths to image and corresponding labels
 87    raw_paths, label_paths = [], []
 88    if split and split not in ["control", "validation"]:
 89        raise ValueError(f"'{split}' is not a valid split choice. Please choose from 'control' / 'validation'.")
 90
 91    if split != "validation":
 92        raw_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", "Images", "*.tif"))))
 93        label_paths.extend(natsorted(glob(os.path.join(path, "Control_Dataset", dirname, "*.tif"))))
 94
 95    if split != "control":
 96        raw_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", "Images", "*.tif"))))
 97        label_paths.extend(natsorted(glob(os.path.join(path, "Validation_Dataset_v1", dirname, "*.tif"))))
 98
 99    assert raw_paths and len(raw_paths) == len(label_paths)
100
101    return raw_paths, label_paths

Get paths to the AimSeg data.

Arguments:
  • path: Filepath to a folder where the data is downloaded.
  • split: The split of the data to be used for training. Either control focused on healthy control specimen, or validation focused on mice undergoing remyelination.
  • targets: The choice of support labels for the task. Either instances for annotated myelinated axons or semantic for axons, inner tongue and myelins.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_aimseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], targets: Literal['instances', 'semantic'] = 'instances', download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
104def get_aimseg_dataset(
105    path: Union[os.PathLike, str],
106    patch_shape: Tuple[int, int],
107    targets: Literal["instances", "semantic"] = "instances",
108    download: bool = False,
109    **kwargs
110) -> Dataset:
111    """Get the AimSeg dataset for axon and myelin segmentation.
112
113    Args:
114        path: Filepath to a folder where the data is downloaded.
115        patch_shape: The patch shape to use for training.
116        targets: The choice of support labels for the task.
117            Either `instances` for annotated myelinated axons or `semantic` for axons, inner tongue and myelins.
118        download: Whether to download the data if it is not present.
119        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
120
121    Returns:
122        The segmentation dataset.
123    """
124    raw_paths, label_paths = get_aimseg_paths(path, None, targets, download)
125
126    return torch_em.default_segmentation_dataset(
127        raw_paths=raw_paths,
128        raw_key=None,
129        label_paths=label_paths,
130        label_key=None,
131        patch_shape=patch_shape,
132        **kwargs
133    )

Get the AimSeg dataset for axon and myelin segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded.
  • patch_shape: The patch shape to use for training.
  • targets: The choice of support labels for the task. Either instances for annotated myelinated axons or semantic for axons, inner tongue and myelins.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_aimseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], targets: Literal['instances', 'semantic'] = 'instances', download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
136def get_aimseg_loader(
137    path: Union[os.PathLike, str],
138    batch_size: int,
139    patch_shape: Tuple[int, int],
140    targets: Literal["instances", "semantic"] = "instances",
141    download: bool = False,
142    **kwargs
143) -> DataLoader:
144    """Get the AimSeg dataset for axon and myelin segmentation.
145
146    Args:
147        path: Filepath to a folder where the data is downloaded.
148        batch_size: The batch size for training.
149        patch_shape: The patch shape to use for training.
150        targets: The choice of support labels for the task.
151            Either 'instances' for annotated myelinated axons or 'semantic' for axons, inner tongue and myelins.
152        download: Whether to download the data if it is not present.
153        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
154
155    Returns:
156        The DataLoader
157    """
158    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
159    dataset = get_aimseg_dataset(path, patch_shape, targets, download, **ds_kwargs)
160    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the AimSeg dataset for axon and myelin segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • targets: The choice of support labels for the task. Either 'instances' for annotated myelinated axons or 'semantic' for axons, inner tongue and myelins.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader