torch_em.data.datasets.light_microscopy.bitdepth_nucseg
The BitDepth NucSeg dataset contains annotations for nucleus segmentation in DAPI stained fluorescence microscopy images.
The dataset is located at https://github.com/masih4/BitDepth_NucSeg/ This dataset is from the publication https://doi.org/10.3390/diagnostics11060967. Please cite it if you use this dataset in your research.
1"""The BitDepth NucSeg dataset contains annotations for nucleus segmentation 2in DAPI stained fluorescence microscopy images. 3 4The dataset is located at https://github.com/masih4/BitDepth_NucSeg/ 5This dataset is from the publication https://doi.org/10.3390/diagnostics11060967. 6Please cite it if you use this dataset in your research. 7""" 8 9import os 10import shutil 11import subprocess 12from glob import glob 13from natsort import natsorted 14from typing import Union, Tuple, Optional, Literal, List 15 16from torch.utils.data import Dataset, DataLoader 17 18import torch_em 19 20from .. import util 21 22 23URL = "https://github.com/masih4/BitDepth_NucSeg" 24 25 26def _remove_other_files(path): 27 "Remove other files from the git repository" 28 all_files = glob(os.path.join(path, "*")) 29 all_files.extend(glob(os.path.join(path, ".*"))) 30 for _file in all_files: 31 if os.path.basename(_file) == "data": 32 continue 33 34 if os.path.isdir(_file): 35 shutil.rmtree(_file) 36 else: 37 os.remove(_file) 38 39 40def get_bitdepth_nucseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 41 """Download the BitDepth NucSeg dataset for nucleus segmentation. 42 43 Args: 44 path: Filepath to a folder where the downloaded data will be saved. 45 download: Whether to download the data if it is not present. 46 47 Returns: 48 The filepath to the downloaded data. 49 """ 50 data_dir = os.path.join(path, "data") 51 if os.path.exists(data_dir): 52 return data_dir 53 54 if not download: 55 raise ValueError("The data directory is not found and download is set to False.") 56 57 # The data is located in a GitHub repository as a zipfile. 58 subprocess.run(["git", "clone", URL, path]) 59 # Remove all git files besides the zipfile 60 _remove_other_files(path) 61 62 zip_path = os.path.join(path, "data", "data.zip") 63 util.unzip(zip_path=zip_path, dst=data_dir) 64 65 return data_dir 66 67 68def get_bitdepth_nucseg_paths( 69 path: Union[os.PathLike, str], 70 magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, 71 download: bool = False 72) -> Tuple[List[str], List[str]]: 73 """Get paths to the BitDepth NucSeg data. 74 75 Args: 76 path: Filepath to a folder where the downloaded data will be saved. 77 magnification: The magnification scale for the input images. 78 download: Whether to download the data if it is not present. 79 80 Returns: 81 List of filepaths for the image data. 82 List of filepaths for the label data. 83 """ 84 data_dir = get_bitdepth_nucseg_data(path, download) 85 86 if magnification is None: 87 magnification = "*" 88 else: 89 if magnification.find("_") != -1: 90 _splits = magnification.split("_") 91 magnification = f"{_splits[0]} {_splits[1]}" 92 93 raw_paths = natsorted(glob(os.path.join(data_dir, magnification, "images_16bit", "*.tif"))) 94 label_paths = natsorted(glob(os.path.join(data_dir, magnification, "label masks", "*.tif"))) 95 96 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 97 98 return raw_paths, label_paths 99 100 101def get_bitdepth_nucseg_dataset( 102 path: Union[os.PathLike, str], 103 patch_shape: Tuple[int, int], 104 magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, 105 download: bool = False, 106 **kwargs 107) -> Dataset: 108 """Get the BitDepth NucSeg dataset for nucleus segmentation. 109 110 Args: 111 path: Filepath to a folder where the downloaded data will be saved. 112 patch_shape: The patch shape to use for training. 113 magnification: The magnification scale for the input images. 114 download: Whether to download the data if it is not present. 115 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 116 117 Returns: 118 The segmentation dataset. 119 """ 120 raw_paths, label_paths = get_bitdepth_nucseg_paths(path, magnification, download) 121 122 return torch_em.default_segmentation_dataset( 123 raw_paths=raw_paths, 124 raw_key=None, 125 label_paths=label_paths, 126 label_key=None, 127 is_seg_dataset=False, 128 patch_shape=patch_shape, 129 **kwargs 130 ) 131 132 133def get_bitdepth_nucseg_loader( 134 path: Union[os.PathLike, str], 135 batch_size: int, 136 patch_shape: Tuple[int, int], 137 magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, 138 download: bool = False, 139 **kwargs 140) -> DataLoader: 141 """Get the BitDepth NucSeg dataloader for nucleus segmentation. 142 143 Args: 144 path: Filepath to a folder where the downloaded data will be saved. 145 batch_size: The batch size for training. 146 patch_shape: The patch shape to use for training. 147 magnification: The magnification scale for the input images. 148 download: Whether to download the data if it is not present. 149 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 150 151 Returns: 152 The DataLoader. 153 """ 154 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 155 dataset = get_bitdepth_nucseg_dataset(path, patch_shape, magnification, download, **ds_kwargs) 156 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://github.com/masih4/BitDepth_NucSeg'
def
get_bitdepth_nucseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
41def get_bitdepth_nucseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 42 """Download the BitDepth NucSeg dataset for nucleus segmentation. 43 44 Args: 45 path: Filepath to a folder where the downloaded data will be saved. 46 download: Whether to download the data if it is not present. 47 48 Returns: 49 The filepath to the downloaded data. 50 """ 51 data_dir = os.path.join(path, "data") 52 if os.path.exists(data_dir): 53 return data_dir 54 55 if not download: 56 raise ValueError("The data directory is not found and download is set to False.") 57 58 # The data is located in a GitHub repository as a zipfile. 59 subprocess.run(["git", "clone", URL, path]) 60 # Remove all git files besides the zipfile 61 _remove_other_files(path) 62 63 zip_path = os.path.join(path, "data", "data.zip") 64 util.unzip(zip_path=zip_path, dst=data_dir) 65 66 return data_dir
Download the BitDepth NucSeg dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the downloaded data.
def
get_bitdepth_nucseg_paths( path: Union[os.PathLike, str], magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
69def get_bitdepth_nucseg_paths( 70 path: Union[os.PathLike, str], 71 magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, 72 download: bool = False 73) -> Tuple[List[str], List[str]]: 74 """Get paths to the BitDepth NucSeg data. 75 76 Args: 77 path: Filepath to a folder where the downloaded data will be saved. 78 magnification: The magnification scale for the input images. 79 download: Whether to download the data if it is not present. 80 81 Returns: 82 List of filepaths for the image data. 83 List of filepaths for the label data. 84 """ 85 data_dir = get_bitdepth_nucseg_data(path, download) 86 87 if magnification is None: 88 magnification = "*" 89 else: 90 if magnification.find("_") != -1: 91 _splits = magnification.split("_") 92 magnification = f"{_splits[0]} {_splits[1]}" 93 94 raw_paths = natsorted(glob(os.path.join(data_dir, magnification, "images_16bit", "*.tif"))) 95 label_paths = natsorted(glob(os.path.join(data_dir, magnification, "label masks", "*.tif"))) 96 97 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 98 99 return raw_paths, label_paths
Get paths to the BitDepth NucSeg data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- magnification: The magnification scale for the input images.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_bitdepth_nucseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
102def get_bitdepth_nucseg_dataset( 103 path: Union[os.PathLike, str], 104 patch_shape: Tuple[int, int], 105 magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, 106 download: bool = False, 107 **kwargs 108) -> Dataset: 109 """Get the BitDepth NucSeg dataset for nucleus segmentation. 110 111 Args: 112 path: Filepath to a folder where the downloaded data will be saved. 113 patch_shape: The patch shape to use for training. 114 magnification: The magnification scale for the input images. 115 download: Whether to download the data if it is not present. 116 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 117 118 Returns: 119 The segmentation dataset. 120 """ 121 raw_paths, label_paths = get_bitdepth_nucseg_paths(path, magnification, download) 122 123 return torch_em.default_segmentation_dataset( 124 raw_paths=raw_paths, 125 raw_key=None, 126 label_paths=label_paths, 127 label_key=None, 128 is_seg_dataset=False, 129 patch_shape=patch_shape, 130 **kwargs 131 )
Get the BitDepth NucSeg dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- magnification: The magnification scale for the input images.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_bitdepth_nucseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
134def get_bitdepth_nucseg_loader( 135 path: Union[os.PathLike, str], 136 batch_size: int, 137 patch_shape: Tuple[int, int], 138 magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, 139 download: bool = False, 140 **kwargs 141) -> DataLoader: 142 """Get the BitDepth NucSeg dataloader for nucleus segmentation. 143 144 Args: 145 path: Filepath to a folder where the downloaded data will be saved. 146 batch_size: The batch size for training. 147 patch_shape: The patch shape to use for training. 148 magnification: The magnification scale for the input images. 149 download: Whether to download the data if it is not present. 150 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 151 152 Returns: 153 The DataLoader. 154 """ 155 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 156 dataset = get_bitdepth_nucseg_dataset(path, patch_shape, magnification, download, **ds_kwargs) 157 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the BitDepth NucSeg dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- magnification: The magnification scale for the input images.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.