torch_em.data.datasets.light_microscopy.brifiseg

The BriFiSeg dataset contains annotations for nuclei segmentation in brightfield images.

The dataset is located at https://zenodo.org/records/7195636. This dataset is from the publication https://doi.org/10.48550/arXiv.2211.03072. Please cite it if you use this dataset for your research.

  1"""The BriFiSeg dataset contains annotations for nuclei segmentation in brightfield images.
  2
  3The dataset is located at https://zenodo.org/records/7195636.
  4This dataset is from the publication https://doi.org/10.48550/arXiv.2211.03072.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9from glob import glob
 10from tqdm import tqdm
 11from natsort import natsorted
 12from typing import Union, Tuple, List, Literal, Optional
 13
 14from skimage.measure import label as connected_components
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18import torch_em
 19
 20from .. import util
 21
 22
 23URL = "https://zenodo.org/records/7195636/files/1channel.tar"
 24CHECKSUM = "2be173c265ab737957dd6c007cc51a5ad528087d23cecc53b211cf4dcc7727fa"
 25
 26
 27def _preprocess_data(path, data_dir):
 28    import h5py
 29    import nibabel as nib
 30
 31    raw_paths = natsorted(glob(os.path.join(path, "1channel", "Task*", "images*", "*.nii.gz")))
 32    label_paths = natsorted(glob(os.path.join(path, "1channel", "Task*", "labels*", "*.nii.gz")))
 33
 34    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 35
 36    for rpath, lpath in tqdm(zip(raw_paths, label_paths), total=len(raw_paths), desc="Preprocess inputs"):
 37        raw = nib.load(rpath).get_fdata().squeeze(-1)
 38        labels = nib.load(lpath).get_fdata().squeeze(-1)
 39        labels = (labels > 0).astype("uint32")  # binarise all nuclei
 40        labels = connected_components(labels).astype(labels.dtype)  # running connected components
 41
 42        fsplit = lpath.split("/")
 43        fname = fsplit[-1].split(".")[0]
 44        split = "train" if fsplit[-2] == "labelsTr" else "test"
 45
 46        os.makedirs(os.path.join(data_dir, split), exist_ok=True)
 47        with h5py.File(os.path.join(data_dir, split, f"{fname}.h5"), "w") as f:
 48            f.create_dataset("raw", data=raw)
 49            f.create_dataset("labels", data=labels)
 50
 51
 52def get_brifiseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 53    """Download the BriFiSeg data.
 54
 55    Args:
 56        path: Filepath to a folder where the downloaded data will be saved.
 57        download: Whether to download the data if it is not present.
 58
 59    Returns:
 60        Filepath where the data is downloaded.
 61    """
 62    data_dir = os.path.join(path, "data")
 63    if os.path.exists(data_dir):
 64        return data_dir
 65
 66    os.makedirs(path, exist_ok=True)
 67
 68    tar_path = os.path.join(path, "1channel.tar")
 69    util.download_source(path=tar_path, url=URL, checksum=CHECKSUM, download=download)
 70    util.unzip_tarfile(tar_path=tar_path, dst=path)
 71
 72    for zip_path in glob(os.path.join(os.path.join(path, "1channel"), "*.zip")):
 73        util.unzip(zip_path=zip_path, dst=os.path.join(path, "1channel"))
 74
 75    _preprocess_data(path, data_dir)
 76
 77    return data_dir
 78
 79
 80def get_brifiseg_paths(
 81    path: Union[os.PathLike, str],
 82    split: Literal['train', 'test'],
 83    cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None,
 84    download: bool = False
 85) -> List[str]:
 86    """Get the BriFiSeg data.
 87
 88    Args:
 89        path: Filepath to a folder where the downloaded data will be saved.
 90        split: The choice of data split.
 91        cell_type: The choice of cell type.
 92        download: Whether to download the data if it is not present.
 93
 94    Returns:
 95        List of filepaths for the input data.
 96    """
 97    data_dir = get_brifiseg_data(path, download)
 98
 99    if split not in ['train', 'test']:
100        raise ValueError(f"'{split}' is not a valid split.")
101
102    if cell_type is None:
103        cell_type = "*"
104
105    input_paths = natsorted(glob(os.path.join(data_dir, split, f"{cell_type}_*.h5")))
106    return input_paths
107
108
109def get_brifiseg_dataset(
110    path: Union[os.PathLike, str],
111    patch_shape: Tuple[int, int],
112    split: Literal['train', 'test'],
113    cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None,
114    download: bool = False,
115    **kwargs
116) -> Dataset:
117    """Get the BriFiSeg dataset for nucleus segmentation.
118
119    Args:
120        path: Filepath to a folder where the downloaded data will be saved.
121        patch_shape: The patch shape to use for training.
122        split: The choice of data split.
123        cell_type: The choice of cell type.
124        download: Whether to download the data if it is not present.
125        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
126
127    Returns:
128        The segmentation dataset.
129    """
130    input_paths = get_brifiseg_paths(path, split, cell_type, download)
131
132    return torch_em.default_segmentation_dataset(
133        raw_paths=input_paths,
134        raw_key="raw",
135        label_paths=input_paths,
136        label_key="labels",
137        patch_shape=patch_shape,
138        **kwargs
139    )
140
141
142def get_brifiseg_loader(
143    path: Union[os.PathLike, str],
144    batch_size: int,
145    patch_shape: Tuple[int, int],
146    split: Literal['train', 'test'],
147    cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None,
148    download: bool = False,
149    **kwargs
150) -> DataLoader:
151    """Get the BriFiSeg dataloader for nucleus segmentation.
152
153    Args:
154        path: Filepath to a folder where the downloaded data will be saved.
155        patch_shape: The patch shape to use for training.
156        split: The choice of data split.
157        cell_type: The choice of cell type.
158        download: Whether to download the data if it is not present.
159        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
160
161    Returns:
162        The DataLoader.
163    """
164    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
165    dataset = get_brifiseg_dataset(path, patch_shape, split, cell_type, download, **ds_kwargs)
166    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/7195636/files/1channel.tar'
CHECKSUM = '2be173c265ab737957dd6c007cc51a5ad528087d23cecc53b211cf4dcc7727fa'
def get_brifiseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
53def get_brifiseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
54    """Download the BriFiSeg data.
55
56    Args:
57        path: Filepath to a folder where the downloaded data will be saved.
58        download: Whether to download the data if it is not present.
59
60    Returns:
61        Filepath where the data is downloaded.
62    """
63    data_dir = os.path.join(path, "data")
64    if os.path.exists(data_dir):
65        return data_dir
66
67    os.makedirs(path, exist_ok=True)
68
69    tar_path = os.path.join(path, "1channel.tar")
70    util.download_source(path=tar_path, url=URL, checksum=CHECKSUM, download=download)
71    util.unzip_tarfile(tar_path=tar_path, dst=path)
72
73    for zip_path in glob(os.path.join(os.path.join(path, "1channel"), "*.zip")):
74        util.unzip(zip_path=zip_path, dst=os.path.join(path, "1channel"))
75
76    _preprocess_data(path, data_dir)
77
78    return data_dir

Download the BriFiSeg data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_brifiseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, download: bool = False) -> List[str]:
 81def get_brifiseg_paths(
 82    path: Union[os.PathLike, str],
 83    split: Literal['train', 'test'],
 84    cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None,
 85    download: bool = False
 86) -> List[str]:
 87    """Get the BriFiSeg data.
 88
 89    Args:
 90        path: Filepath to a folder where the downloaded data will be saved.
 91        split: The choice of data split.
 92        cell_type: The choice of cell type.
 93        download: Whether to download the data if it is not present.
 94
 95    Returns:
 96        List of filepaths for the input data.
 97    """
 98    data_dir = get_brifiseg_data(path, download)
 99
100    if split not in ['train', 'test']:
101        raise ValueError(f"'{split}' is not a valid split.")
102
103    if cell_type is None:
104        cell_type = "*"
105
106    input_paths = natsorted(glob(os.path.join(data_dir, split, f"{cell_type}_*.h5")))
107    return input_paths

Get the BriFiSeg data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The choice of data split.
  • cell_type: The choice of cell type.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the input data.

def get_brifiseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
110def get_brifiseg_dataset(
111    path: Union[os.PathLike, str],
112    patch_shape: Tuple[int, int],
113    split: Literal['train', 'test'],
114    cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None,
115    download: bool = False,
116    **kwargs
117) -> Dataset:
118    """Get the BriFiSeg dataset for nucleus segmentation.
119
120    Args:
121        path: Filepath to a folder where the downloaded data will be saved.
122        patch_shape: The patch shape to use for training.
123        split: The choice of data split.
124        cell_type: The choice of cell type.
125        download: Whether to download the data if it is not present.
126        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
127
128    Returns:
129        The segmentation dataset.
130    """
131    input_paths = get_brifiseg_paths(path, split, cell_type, download)
132
133    return torch_em.default_segmentation_dataset(
134        raw_paths=input_paths,
135        raw_key="raw",
136        label_paths=input_paths,
137        label_key="labels",
138        patch_shape=patch_shape,
139        **kwargs
140    )

Get the BriFiSeg dataset for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • cell_type: The choice of cell type.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_brifiseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
143def get_brifiseg_loader(
144    path: Union[os.PathLike, str],
145    batch_size: int,
146    patch_shape: Tuple[int, int],
147    split: Literal['train', 'test'],
148    cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None,
149    download: bool = False,
150    **kwargs
151) -> DataLoader:
152    """Get the BriFiSeg dataloader for nucleus segmentation.
153
154    Args:
155        path: Filepath to a folder where the downloaded data will be saved.
156        patch_shape: The patch shape to use for training.
157        split: The choice of data split.
158        cell_type: The choice of cell type.
159        download: Whether to download the data if it is not present.
160        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
161
162    Returns:
163        The DataLoader.
164    """
165    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
166    dataset = get_brifiseg_dataset(path, patch_shape, split, cell_type, download, **ds_kwargs)
167    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the BriFiSeg dataloader for nucleus segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • cell_type: The choice of cell type.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.