torch_em.data.datasets.light_microscopy.brifiseg
The BriFiSeg dataset contains annotations for nuclei segmentation in brightfield images.
The dataset is located at https://zenodo.org/records/7195636. This dataset is from the publication https://doi.org/10.48550/arXiv.2211.03072. Please cite it if you use this dataset for your research.
1"""The BriFiSeg dataset contains annotations for nuclei segmentation in brightfield images. 2 3The dataset is located at https://zenodo.org/records/7195636. 4This dataset is from the publication https://doi.org/10.48550/arXiv.2211.03072. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from natsort import natsorted 12from typing import Union, Tuple, List, Literal, Optional 13 14from skimage.measure import label as connected_components 15 16from torch.utils.data import Dataset, DataLoader 17 18import torch_em 19 20from .. import util 21 22 23URL = "https://zenodo.org/records/7195636/files/1channel.tar" 24CHECKSUM = "2be173c265ab737957dd6c007cc51a5ad528087d23cecc53b211cf4dcc7727fa" 25 26 27def _preprocess_data(path, data_dir): 28 import h5py 29 import nibabel as nib 30 31 raw_paths = natsorted(glob(os.path.join(path, "1channel", "Task*", "images*", "*.nii.gz"))) 32 label_paths = natsorted(glob(os.path.join(path, "1channel", "Task*", "labels*", "*.nii.gz"))) 33 34 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 35 36 for rpath, lpath in tqdm(zip(raw_paths, label_paths), total=len(raw_paths), desc="Preprocess inputs"): 37 raw = nib.load(rpath).get_fdata().squeeze(-1) 38 labels = nib.load(lpath).get_fdata().squeeze(-1) 39 labels = (labels > 0).astype("uint32") # binarise all nuclei 40 labels = connected_components(labels).astype(labels.dtype) # running connected components 41 42 fsplit = lpath.split("/") 43 fname = fsplit[-1].split(".")[0] 44 split = "train" if fsplit[-2] == "labelsTr" else "test" 45 46 os.makedirs(os.path.join(data_dir, split), exist_ok=True) 47 with h5py.File(os.path.join(data_dir, split, f"{fname}.h5"), "w") as f: 48 f.create_dataset("raw", data=raw) 49 f.create_dataset("labels", data=labels) 50 51 52def get_brifiseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 53 """Download the BriFiSeg data. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 Filepath where the data is downloaded. 61 """ 62 data_dir = os.path.join(path, "data") 63 if os.path.exists(data_dir): 64 return data_dir 65 66 os.makedirs(path, exist_ok=True) 67 68 tar_path = os.path.join(path, "1channel.tar") 69 util.download_source(path=tar_path, url=URL, checksum=CHECKSUM, download=download) 70 util.unzip_tarfile(tar_path=tar_path, dst=path) 71 72 for zip_path in glob(os.path.join(os.path.join(path, "1channel"), "*.zip")): 73 util.unzip(zip_path=zip_path, dst=os.path.join(path, "1channel")) 74 75 _preprocess_data(path, data_dir) 76 77 return data_dir 78 79 80def get_brifiseg_paths( 81 path: Union[os.PathLike, str], 82 split: Literal['train', 'test'], 83 cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, 84 download: bool = False 85) -> List[str]: 86 """Get the BriFiSeg data. 87 88 Args: 89 path: Filepath to a folder where the downloaded data will be saved. 90 split: The choice of data split. 91 cell_type: The choice of cell type. 92 download: Whether to download the data if it is not present. 93 94 Returns: 95 List of filepaths for the input data. 96 """ 97 data_dir = get_brifiseg_data(path, download) 98 99 if split not in ['train', 'test']: 100 raise ValueError(f"'{split}' is not a valid split.") 101 102 if cell_type is None: 103 cell_type = "*" 104 105 input_paths = natsorted(glob(os.path.join(data_dir, split, f"{cell_type}_*.h5"))) 106 return input_paths 107 108 109def get_brifiseg_dataset( 110 path: Union[os.PathLike, str], 111 patch_shape: Tuple[int, int], 112 split: Literal['train', 'test'], 113 cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, 114 download: bool = False, 115 **kwargs 116) -> Dataset: 117 """Get the BriFiSeg dataset for nucleus segmentation. 118 119 Args: 120 path: Filepath to a folder where the downloaded data will be saved. 121 patch_shape: The patch shape to use for training. 122 split: The choice of data split. 123 cell_type: The choice of cell type. 124 download: Whether to download the data if it is not present. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 126 127 Returns: 128 The segmentation dataset. 129 """ 130 input_paths = get_brifiseg_paths(path, split, cell_type, download) 131 132 return torch_em.default_segmentation_dataset( 133 raw_paths=input_paths, 134 raw_key="raw", 135 label_paths=input_paths, 136 label_key="labels", 137 patch_shape=patch_shape, 138 **kwargs 139 ) 140 141 142def get_brifiseg_loader( 143 path: Union[os.PathLike, str], 144 batch_size: int, 145 patch_shape: Tuple[int, int], 146 split: Literal['train', 'test'], 147 cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, 148 download: bool = False, 149 **kwargs 150) -> DataLoader: 151 """Get the BriFiSeg dataloader for nucleus segmentation. 152 153 Args: 154 path: Filepath to a folder where the downloaded data will be saved. 155 patch_shape: The patch shape to use for training. 156 split: The choice of data split. 157 cell_type: The choice of cell type. 158 download: Whether to download the data if it is not present. 159 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 160 161 Returns: 162 The DataLoader. 163 """ 164 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 165 dataset = get_brifiseg_dataset(path, patch_shape, split, cell_type, download, **ds_kwargs) 166 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/7195636/files/1channel.tar'
CHECKSUM =
'2be173c265ab737957dd6c007cc51a5ad528087d23cecc53b211cf4dcc7727fa'
def
get_brifiseg_data(path: Union[os.PathLike, str], download: bool = False) -> str:
53def get_brifiseg_data(path: Union[os.PathLike, str], download: bool = False) -> str: 54 """Download the BriFiSeg data. 55 56 Args: 57 path: Filepath to a folder where the downloaded data will be saved. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 Filepath where the data is downloaded. 62 """ 63 data_dir = os.path.join(path, "data") 64 if os.path.exists(data_dir): 65 return data_dir 66 67 os.makedirs(path, exist_ok=True) 68 69 tar_path = os.path.join(path, "1channel.tar") 70 util.download_source(path=tar_path, url=URL, checksum=CHECKSUM, download=download) 71 util.unzip_tarfile(tar_path=tar_path, dst=path) 72 73 for zip_path in glob(os.path.join(os.path.join(path, "1channel"), "*.zip")): 74 util.unzip(zip_path=zip_path, dst=os.path.join(path, "1channel")) 75 76 _preprocess_data(path, data_dir) 77 78 return data_dir
Download the BriFiSeg data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_brifiseg_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, download: bool = False) -> List[str]:
81def get_brifiseg_paths( 82 path: Union[os.PathLike, str], 83 split: Literal['train', 'test'], 84 cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, 85 download: bool = False 86) -> List[str]: 87 """Get the BriFiSeg data. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 split: The choice of data split. 92 cell_type: The choice of cell type. 93 download: Whether to download the data if it is not present. 94 95 Returns: 96 List of filepaths for the input data. 97 """ 98 data_dir = get_brifiseg_data(path, download) 99 100 if split not in ['train', 'test']: 101 raise ValueError(f"'{split}' is not a valid split.") 102 103 if cell_type is None: 104 cell_type = "*" 105 106 input_paths = natsorted(glob(os.path.join(data_dir, split, f"{cell_type}_*.h5"))) 107 return input_paths
Get the BriFiSeg data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The choice of data split.
- cell_type: The choice of cell type.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the input data.
def
get_brifiseg_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
110def get_brifiseg_dataset( 111 path: Union[os.PathLike, str], 112 patch_shape: Tuple[int, int], 113 split: Literal['train', 'test'], 114 cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, 115 download: bool = False, 116 **kwargs 117) -> Dataset: 118 """Get the BriFiSeg dataset for nucleus segmentation. 119 120 Args: 121 path: Filepath to a folder where the downloaded data will be saved. 122 patch_shape: The patch shape to use for training. 123 split: The choice of data split. 124 cell_type: The choice of cell type. 125 download: Whether to download the data if it is not present. 126 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 127 128 Returns: 129 The segmentation dataset. 130 """ 131 input_paths = get_brifiseg_paths(path, split, cell_type, download) 132 133 return torch_em.default_segmentation_dataset( 134 raw_paths=input_paths, 135 raw_key="raw", 136 label_paths=input_paths, 137 label_key="labels", 138 patch_shape=patch_shape, 139 **kwargs 140 )
Get the BriFiSeg dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- cell_type: The choice of cell type.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_brifiseg_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
143def get_brifiseg_loader( 144 path: Union[os.PathLike, str], 145 batch_size: int, 146 patch_shape: Tuple[int, int], 147 split: Literal['train', 'test'], 148 cell_type: Optional[Literal['A549', 'HELA', 'MCF7', 'RPE1']] = None, 149 download: bool = False, 150 **kwargs 151) -> DataLoader: 152 """Get the BriFiSeg dataloader for nucleus segmentation. 153 154 Args: 155 path: Filepath to a folder where the downloaded data will be saved. 156 patch_shape: The patch shape to use for training. 157 split: The choice of data split. 158 cell_type: The choice of cell type. 159 download: Whether to download the data if it is not present. 160 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 161 162 Returns: 163 The DataLoader. 164 """ 165 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 166 dataset = get_brifiseg_dataset(path, patch_shape, split, cell_type, download, **ds_kwargs) 167 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the BriFiSeg dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- cell_type: The choice of cell type.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.