torch_em.data.datasets.light_microscopy.phmamm
The PhMamm dataset contains 3D light-sheet microscopy volumes of Phallusia mammillata embryos with cell membrane segmentations.
The dataset is located at https://figshare.com/articles/dataset/3D_Mask_R-CNN_data/26973085. The original data is from the publication https://doi.org/10.1126/science.aar5663. Please cite it if you use this dataset in your research.
1"""The PhMamm dataset contains 3D light-sheet microscopy volumes of Phallusia mammillata 2embryos with cell membrane segmentations. 3 4The dataset is located at https://figshare.com/articles/dataset/3D_Mask_R-CNN_data/26973085. 5The original data is from the publication https://doi.org/10.1126/science.aar5663. 6Please cite it if you use this dataset in your research. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Tuple, Optional, List 13 14from torch.utils.data import Dataset, DataLoader 15 16import torch_em 17 18from .. import util 19 20 21URLS = { 22 "inputs": "https://ndownloader.figshare.com/files/51130115", 23 "ground_truth": "https://ndownloader.figshare.com/files/51130100", 24} 25CHECKSUMS = { 26 "inputs": None, 27 "ground_truth": None, 28} 29 30 31def get_phmamm_data(path: Union[os.PathLike, str], download: bool = False) -> str: 32 """Download the PhMamm dataset. 33 34 Args: 35 path: Filepath to a folder where the downloaded data will be saved. 36 download: Whether to download the data if it is not present. 37 38 Returns: 39 Filepath where the dataset is stored. 40 """ 41 data_dir = os.path.join(path, "data") 42 if os.path.exists(data_dir): 43 return data_dir 44 45 os.makedirs(path, exist_ok=True) 46 47 inputs_zip = os.path.join(path, "Inputs.zip") 48 util.download_source(inputs_zip, URLS["inputs"], download, checksum=CHECKSUMS["inputs"]) 49 util.unzip(inputs_zip, data_dir, remove=True) 50 51 gt_zip = os.path.join(path, "ASTEC_Ground_truth.zip") 52 util.download_source(gt_zip, URLS["ground_truth"], download, checksum=CHECKSUMS["ground_truth"]) 53 util.unzip(gt_zip, data_dir, remove=True) 54 55 return data_dir 56 57 58def get_phmamm_paths( 59 path: Union[os.PathLike, str], download: bool = False, 60) -> Tuple[List[str], List[str]]: 61 """Get paths to the PhMamm data. 62 63 Args: 64 path: Filepath to a folder where the downloaded data will be saved. 65 download: Whether to download the data if it is not present. 66 67 Returns: 68 List of filepaths for the image data. 69 List of filepaths for the label data. 70 """ 71 data_dir = get_phmamm_data(path, download) 72 73 raw_paths = natsorted(glob(os.path.join(data_dir, "Inputs", "*.tiff"))) 74 label_paths = natsorted(glob(os.path.join(data_dir, "ASTEC_Ground_truth", "*.tiff"))) 75 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 76 77 return raw_paths, label_paths 78 79 80def get_phmamm_dataset( 81 path: Union[os.PathLike, str], 82 patch_shape: Tuple[int, int, int], 83 offsets: Optional[List[List[int]]] = None, 84 boundaries: bool = False, 85 binary: bool = False, 86 download: bool = False, 87 **kwargs 88) -> Dataset: 89 """Get the PhMamm dataset for cell segmentation in light-sheet microscopy. 90 91 Args: 92 path: Filepath to a folder where the downloaded data will be saved. 93 patch_shape: The patch shape to use for training. 94 offsets: Offset values for affinity computation used as target. 95 boundaries: Whether to compute boundaries as the target. 96 binary: Whether to use a binary segmentation target. 97 download: Whether to download the data if it is not present. 98 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 99 100 Returns: 101 The segmentation dataset. 102 """ 103 raw_paths, label_paths = get_phmamm_paths(path, download) 104 105 kwargs, _ = util.add_instance_label_transform( 106 kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary 107 ) 108 109 return torch_em.default_segmentation_dataset( 110 raw_paths=raw_paths, 111 raw_key=None, 112 label_paths=label_paths, 113 label_key=None, 114 patch_shape=patch_shape, 115 **kwargs 116 ) 117 118 119def get_phmamm_loader( 120 path: Union[os.PathLike, str], 121 batch_size: int, 122 patch_shape: Tuple[int, int, int], 123 offsets: Optional[List[List[int]]] = None, 124 boundaries: bool = False, 125 binary: bool = False, 126 download: bool = False, 127 **kwargs 128) -> DataLoader: 129 """Get the PhMamm dataloader for cell segmentation in light-sheet microscopy. 130 131 Args: 132 path: Filepath to a folder where the downloaded data will be saved. 133 batch_size: The batch size for training. 134 patch_shape: The patch shape to use for training. 135 offsets: Offset values for affinity computation used as target. 136 boundaries: Whether to compute boundaries as the target. 137 binary: Whether to use a binary segmentation target. 138 download: Whether to download the data if it is not present. 139 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 140 141 Returns: 142 The DataLoader. 143 """ 144 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 145 dataset = get_phmamm_dataset( 146 path=path, 147 patch_shape=patch_shape, 148 offsets=offsets, 149 boundaries=boundaries, 150 binary=binary, 151 download=download, 152 **ds_kwargs, 153 ) 154 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
URLS =
{'inputs': 'https://ndownloader.figshare.com/files/51130115', 'ground_truth': 'https://ndownloader.figshare.com/files/51130100'}
CHECKSUMS =
{'inputs': None, 'ground_truth': None}
def
get_phmamm_data(path: Union[os.PathLike, str], download: bool = False) -> str:
32def get_phmamm_data(path: Union[os.PathLike, str], download: bool = False) -> str: 33 """Download the PhMamm dataset. 34 35 Args: 36 path: Filepath to a folder where the downloaded data will be saved. 37 download: Whether to download the data if it is not present. 38 39 Returns: 40 Filepath where the dataset is stored. 41 """ 42 data_dir = os.path.join(path, "data") 43 if os.path.exists(data_dir): 44 return data_dir 45 46 os.makedirs(path, exist_ok=True) 47 48 inputs_zip = os.path.join(path, "Inputs.zip") 49 util.download_source(inputs_zip, URLS["inputs"], download, checksum=CHECKSUMS["inputs"]) 50 util.unzip(inputs_zip, data_dir, remove=True) 51 52 gt_zip = os.path.join(path, "ASTEC_Ground_truth.zip") 53 util.download_source(gt_zip, URLS["ground_truth"], download, checksum=CHECKSUMS["ground_truth"]) 54 util.unzip(gt_zip, data_dir, remove=True) 55 56 return data_dir
Download the PhMamm dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the dataset is stored.
def
get_phmamm_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
59def get_phmamm_paths( 60 path: Union[os.PathLike, str], download: bool = False, 61) -> Tuple[List[str], List[str]]: 62 """Get paths to the PhMamm data. 63 64 Args: 65 path: Filepath to a folder where the downloaded data will be saved. 66 download: Whether to download the data if it is not present. 67 68 Returns: 69 List of filepaths for the image data. 70 List of filepaths for the label data. 71 """ 72 data_dir = get_phmamm_data(path, download) 73 74 raw_paths = natsorted(glob(os.path.join(data_dir, "Inputs", "*.tiff"))) 75 label_paths = natsorted(glob(os.path.join(data_dir, "ASTEC_Ground_truth", "*.tiff"))) 76 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 77 78 return raw_paths, label_paths
Get paths to the PhMamm data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_phmamm_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
81def get_phmamm_dataset( 82 path: Union[os.PathLike, str], 83 patch_shape: Tuple[int, int, int], 84 offsets: Optional[List[List[int]]] = None, 85 boundaries: bool = False, 86 binary: bool = False, 87 download: bool = False, 88 **kwargs 89) -> Dataset: 90 """Get the PhMamm dataset for cell segmentation in light-sheet microscopy. 91 92 Args: 93 path: Filepath to a folder where the downloaded data will be saved. 94 patch_shape: The patch shape to use for training. 95 offsets: Offset values for affinity computation used as target. 96 boundaries: Whether to compute boundaries as the target. 97 binary: Whether to use a binary segmentation target. 98 download: Whether to download the data if it is not present. 99 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 100 101 Returns: 102 The segmentation dataset. 103 """ 104 raw_paths, label_paths = get_phmamm_paths(path, download) 105 106 kwargs, _ = util.add_instance_label_transform( 107 kwargs, add_binary_target=True, offsets=offsets, boundaries=boundaries, binary=binary 108 ) 109 110 return torch_em.default_segmentation_dataset( 111 raw_paths=raw_paths, 112 raw_key=None, 113 label_paths=label_paths, 114 label_key=None, 115 patch_shape=patch_shape, 116 **kwargs 117 )
Get the PhMamm dataset for cell segmentation in light-sheet microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset.
Returns:
The segmentation dataset.
def
get_phmamm_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int, int], offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
120def get_phmamm_loader( 121 path: Union[os.PathLike, str], 122 batch_size: int, 123 patch_shape: Tuple[int, int, int], 124 offsets: Optional[List[List[int]]] = None, 125 boundaries: bool = False, 126 binary: bool = False, 127 download: bool = False, 128 **kwargs 129) -> DataLoader: 130 """Get the PhMamm dataloader for cell segmentation in light-sheet microscopy. 131 132 Args: 133 path: Filepath to a folder where the downloaded data will be saved. 134 batch_size: The batch size for training. 135 patch_shape: The patch shape to use for training. 136 offsets: Offset values for affinity computation used as target. 137 boundaries: Whether to compute boundaries as the target. 138 binary: Whether to use a binary segmentation target. 139 download: Whether to download the data if it is not present. 140 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 141 142 Returns: 143 The DataLoader. 144 """ 145 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 146 dataset = get_phmamm_dataset( 147 path=path, 148 patch_shape=patch_shape, 149 offsets=offsets, 150 boundaries=boundaries, 151 binary=binary, 152 download=download, 153 **ds_kwargs, 154 ) 155 return torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
Get the PhMamm dataloader for cell segmentation in light-sheet microscopy.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_datasetor for the PyTorch DataLoader.
Returns:
The DataLoader.