torch_em.data.datasets.light_microscopy.bitdepth_nucseg

The BitDepth NucSeg dataset contains annotations for nucleus segmentation in DAPI stained fluorescence microscopy images.

The dataset is located at https://github.com/masih4/BitDepth_NucSeg/ This dataset is from the publication https://doi.org/10.3390/diagnostics11060967. Please cite it if you use this dataset in your research.

  1"""The BitDepth NucSeg dataset contains annotations for nucleus segmentation
  2in DAPI stained fluorescence microscopy images.
  3
  4The dataset is located at https://github.com/masih4/BitDepth_NucSeg/
  5This dataset is from the publication https://doi.org/10.3390/diagnostics11060967.
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9import os
 10import shutil
 11import subprocess
 12from glob import glob
 13from natsort import natsorted
 14from typing import Union, Tuple, Optional, Literal, List
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18import torch_em
 19
 20from .. import util
 21
 22
 23URL = "https://github.com/masih4/BitDepth_NucSeg"
 24
 25
 26def _remove_other_files(path):
 27    "Remove other files from the git repository"
 28    all_files = glob(os.path.join(path, "*"))
 29    all_files.extend(glob(os.path.join(path, ".*")))
 30    for _file in all_files:
 31        if os.path.basename(_file) == "data":
 32            continue
 33
 34        if os.path.isdir(_file):
 35            shutil.rmtree(_file)
 36        else:
 37            os.remove(_file)
 38
 39
 40def get_bitdepth_nucseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 41    """Download the BitDepth NucSeg dataset for nucleus segmentation.
 42
 43    Args:
 44        path: Filepath to a folder where the downloaded data will be saved.
 45        download: Whether to download the data if it is not present.
 46
 47    Returns:
 48        The filepath to the downloaded data.
 49    """
 50    data_dir = os.path.join(path, "data")
 51    if os.path.exists(data_dir):
 52        return data_dir
 53
 54    if not download:
 55        raise ValueError("The data directory is not found and download is set to False.")
 56
 57    # The data is located in a GitHub repository as a zipfile.
 58    subprocess.run(["git", "clone", URL, path])
 59    # Remove all git files besides the zipfile
 60    _remove_other_files(path)
 61
 62    zip_path = os.path.join(path, "data", "data.zip")
 63    util.unzip(zip_path=zip_path, dst=data_dir)
 64
 65    return data_dir
 66
 67
 68def get_bitdepth_nucseg_paths(
 69    path: Union[os.PathLike, str],
 70    magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None,
 71    download: bool = False
 72) -> Tuple[List[str], List[str]]:
 73    """Get paths to the BitDepth NucSeg data.
 74
 75    Args:
 76        path: Filepath to a folder where the downloaded data will be saved.
 77        magnification: The magnification scale for the input images.
 78        download: Whether to download the data if it is not present.
 79
 80    Returns:
 81        List of filepaths for the image data.
 82        List of filepaths for the label data.
 83    """
 84    data_dir = get_bitdepth_nucseg_data(path, download)
 85
 86    if magnification is None:
 87        magnification = "*"
 88    else:
 89        if magnification.find("_") != -1:
 90            _splits = magnification.split("_")
 91            magnification = f"{_splits[0]} {_splits[1]}"
 92
 93    raw_paths = natsorted(glob(os.path.join(data_dir, magnification, "images_16bit", "*.tif")))
 94    label_paths = natsorted(glob(os.path.join(data_dir, magnification, "label masks", "*.tif")))
 95
 96    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 97
 98    return raw_paths, label_paths
 99
100
101def get_bitdepth_nucseg_dataset(
102    path: Union[os.PathLike, str],
103    patch_shape: Tuple[int, int],
104    magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None,
105    download: bool = False,
106    **kwargs
107) -> Dataset:
108    """Get the BitDepth NucSeg dataset for nucleus segmentation.
109
110    Args:
111        path: Filepath to a folder where the downloaded data will be saved.
112        patch_shape: The patch shape to use for training.
113        magnification: The magnification scale for the input images.
114        download: Whether to download the data if it is not present.
115        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
116
117    Returns:
118        The segmentation dataset.
119    """
120    raw_paths, label_paths = get_bitdepth_nucseg_paths(path, magnification, download)
121
122    return torch_em.default_segmentation_dataset(
123        raw_paths=raw_paths,
124        raw_key=None,
125        label_paths=label_paths,
126        label_key=None,
127        is_seg_dataset=False,
128        patch_shape=patch_shape,
129        **kwargs
130    )
131
132
133def get_bitdepth_nucseg_loader(
134    path: Union[os.PathLike, str],
135    batch_size: int,
136    patch_shape: Tuple[int, int],
137    magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None,
138    download: bool = False,
139    **kwargs
140) -> DataLoader:
141    """Get the BitDepth NucSeg dataloader for nucleus segmentation.
142
143    Args:
144        path: Filepath to a folder where the downloaded data will be saved.
145        batch_size: The batch size for training.
146        patch_shape: The patch shape to use for training.
147        magnification: The magnification scale for the input images.
148        download: Whether to download the data if it is not present.
149        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
150
151    Returns:
152        The DataLoader.
153    """
154    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
155    dataset = get_bitdepth_nucseg_dataset(path, patch_shape, magnification, download, **ds_kwargs)
156    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://github.com/masih4/BitDepth_NucSeg'
def get_bitdepth_nucseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
41def get_bitdepth_nucseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
42    """Download the BitDepth NucSeg dataset for nucleus segmentation.
43
44    Args:
45        path: Filepath to a folder where the downloaded data will be saved.
46        download: Whether to download the data if it is not present.
47
48    Returns:
49        The filepath to the downloaded data.
50    """
51    data_dir = os.path.join(path, "data")
52    if os.path.exists(data_dir):
53        return data_dir
54
55    if not download:
56        raise ValueError("The data directory is not found and download is set to False.")
57
58    # The data is located in a GitHub repository as a zipfile.
59    subprocess.run(["git", "clone", URL, path])
60    # Remove all git files besides the zipfile
61    _remove_other_files(path)
62
63    zip_path = os.path.join(path, "data", "data.zip")
64    util.unzip(zip_path=zip_path, dst=data_dir)
65
66    return data_dir

Download the BitDepth NucSeg dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the downloaded data.

def get_bitdepth_nucseg_paths( path: Union[os.PathLike, str], magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
69def get_bitdepth_nucseg_paths(
70    path: Union[os.PathLike, str],
71    magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None,
72    download: bool = False
73) -> Tuple[List[str], List[str]]:
74    """Get paths to the BitDepth NucSeg data.
75
76    Args:
77        path: Filepath to a folder where the downloaded data will be saved.
78        magnification: The magnification scale for the input images.
79        download: Whether to download the data if it is not present.
80
81    Returns:
82        List of filepaths for the image data.
83        List of filepaths for the label data.
84    """
85    data_dir = get_bitdepth_nucseg_data(path, download)
86
87    if magnification is None:
88        magnification = "*"
89    else:
90        if magnification.find("_") != -1:
91            _splits = magnification.split("_")
92            magnification = f"{_splits[0]} {_splits[1]}"
93
94    raw_paths = natsorted(glob(os.path.join(data_dir, magnification, "images_16bit", "*.tif")))
95    label_paths = natsorted(glob(os.path.join(data_dir, magnification, "label masks", "*.tif")))
96
97    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
98
99    return raw_paths, label_paths

Get paths to the BitDepth NucSeg data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • magnification: The magnification scale for the input images.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_bitdepth_nucseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
102def get_bitdepth_nucseg_dataset(
103    path: Union[os.PathLike, str],
104    patch_shape: Tuple[int, int],
105    magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None,
106    download: bool = False,
107    **kwargs
108) -> Dataset:
109    """Get the BitDepth NucSeg dataset for nucleus segmentation.
110
111    Args:
112        path: Filepath to a folder where the downloaded data will be saved.
113        patch_shape: The patch shape to use for training.
114        magnification: The magnification scale for the input images.
115        download: Whether to download the data if it is not present.
116        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
117
118    Returns:
119        The segmentation dataset.
120    """
121    raw_paths, label_paths = get_bitdepth_nucseg_paths(path, magnification, download)
122
123    return torch_em.default_segmentation_dataset(
124        raw_paths=raw_paths,
125        raw_key=None,
126        label_paths=label_paths,
127        label_key=None,
128        is_seg_dataset=False,
129        patch_shape=patch_shape,
130        **kwargs
131    )

Get the BitDepth NucSeg dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • magnification: The magnification scale for the input images.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_bitdepth_nucseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
134def get_bitdepth_nucseg_loader(
135    path: Union[os.PathLike, str],
136    batch_size: int,
137    patch_shape: Tuple[int, int],
138    magnification: Optional[Literal['20x', '40x_air', '40x_oil', '63x_oil']] = None,
139    download: bool = False,
140    **kwargs
141) -> DataLoader:
142    """Get the BitDepth NucSeg dataloader for nucleus segmentation.
143
144    Args:
145        path: Filepath to a folder where the downloaded data will be saved.
146        batch_size: The batch size for training.
147        patch_shape: The patch shape to use for training.
148        magnification: The magnification scale for the input images.
149        download: Whether to download the data if it is not present.
150        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
151
152    Returns:
153        The DataLoader.
154    """
155    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
156    dataset = get_bitdepth_nucseg_dataset(path, patch_shape, magnification, download, **ds_kwargs)
157    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the BitDepth NucSeg dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • magnification: The magnification scale for the input images.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.