torch_em.data.datasets.light_microscopy.brain_organoids
The Brain Organoids dataset contains annotations for organoid segmentation in 2d brightfield images of brain organoids.
This dataset is from the publication https://doi.org/10.1038/s41597-024-03330-z. Please cite it if you use this dataset for your research.
1"""The Brain Organoids dataset contains annotations for organoid segmentation in 22d brightfield images of brain organoids. 3 4This dataset is from the publication https://doi.org/10.1038/s41597-024-03330-z. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9from glob import glob 10from tqdm import tqdm 11from natsort import natsorted 12from typing import Union, Tuple, List 13 14import numpy as np 15import imageio.v3 as imageio 16 17from torch.utils.data import Dataset, DataLoader 18 19import torch_em 20 21from .. import util 22from .neurips_cell_seg import to_rgb 23 24 25URL = "https://zenodo.org/records/10301912/files/data.zip" 26CHECKSUM = "bc2ed56717a65ccd49e27bac92c3b714ca4bb245299698b68baa599c9d510a26" 27 28 29def get_brain_organoids_data(path: Union[os.PathLike, str], download: bool = False) -> str: 30 """Download the Brain Organoids dataset. 31 32 Args: 33 path: Filepath to a folder where the downloaded data will be saved. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 Filepath where the data is downloaded. 38 """ 39 data_dir = os.path.join(path, "data") 40 if os.path.exists(data_dir): 41 return data_dir 42 43 os.makedirs(path, exist_ok=True) 44 45 zip_path = os.path.join(path, "data.zip") 46 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 47 util.unzip(zip_path=zip_path, dst=path) 48 49 return data_dir 50 51 52def get_brain_organoids_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 53 """Get the Brain Organoids data. 54 55 Args: 56 path: Filepath to a folder where the downloaded data will be saved. 57 download: Whether to download the data if it is not present. 58 59 Returns: 60 List of filepaths for the image data. 61 List of filepaths for the label data. 62 """ 63 data_dir = get_brain_organoids_data(path, download) 64 65 raw_paths = natsorted(glob(os.path.join(data_dir, "imgs", "*"))) 66 label_paths = natsorted(glob(os.path.join(data_dir, "labels", "*.npy"))) 67 68 preprocessed_dir = os.path.join(data_dir, "labels_preprocessed") 69 os.makedirs(preprocessed_dir, exist_ok=True) 70 71 neu_label_paths = [] 72 for lpath in tqdm(label_paths, desc="Preprocessing labels"): 73 neu_lpath = lpath.replace("labels", "labels_preprocessed").replace(".npy", ".tif") 74 neu_label_paths.append(neu_lpath) 75 if os.path.exists(neu_lpath): 76 continue 77 78 label = np.load(lpath) 79 imageio.imwrite(neu_lpath, label) 80 81 return raw_paths, neu_label_paths 82 83 84def get_brain_organoids_dataset( 85 path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs 86) -> Dataset: 87 """Get the Brain Organoids dataset for organoid segmentation. 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 patch_shape: The patch shape to use for training. 92 download: Whether to download the data if it is not present. 93 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 94 95 Returns: 96 The segmentation dataset. 97 """ 98 raw_paths, label_paths = get_brain_organoids_paths(path, download) 99 100 if "raw_transform" not in kwargs: 101 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 102 103 return torch_em.default_segmentation_dataset( 104 raw_paths=raw_paths, 105 raw_key=None, 106 label_paths=label_paths, 107 label_key=None, 108 is_seg_dataset=False, 109 with_channels=True, 110 patch_shape=patch_shape, 111 **kwargs 112 ) 113 114 115def get_brain_organoids_loader( 116 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs 117) -> DataLoader: 118 """Get the Brain Organoids dataloader for organoid segmentation. 119 120 Args: 121 path: Filepath to a folder where the downloaded data will be saved. 122 patch_shape: The patch shape to use for training. 123 download: Whether to download the data if it is not present. 124 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 125 126 Returns: 127 The DataLoader. 128 """ 129 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 130 dataset = get_brain_organoids_dataset(path, patch_shape, download, **ds_kwargs) 131 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/10301912/files/data.zip'
CHECKSUM =
'bc2ed56717a65ccd49e27bac92c3b714ca4bb245299698b68baa599c9d510a26'
def
get_brain_organoids_data(path: Union[os.PathLike, str], download: bool = False) -> str:
30def get_brain_organoids_data(path: Union[os.PathLike, str], download: bool = False) -> str: 31 """Download the Brain Organoids dataset. 32 33 Args: 34 path: Filepath to a folder where the downloaded data will be saved. 35 download: Whether to download the data if it is not present. 36 37 Returns: 38 Filepath where the data is downloaded. 39 """ 40 data_dir = os.path.join(path, "data") 41 if os.path.exists(data_dir): 42 return data_dir 43 44 os.makedirs(path, exist_ok=True) 45 46 zip_path = os.path.join(path, "data.zip") 47 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 48 util.unzip(zip_path=zip_path, dst=path) 49 50 return data_dir
Download the Brain Organoids dataset.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is downloaded.
def
get_brain_organoids_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]:
53def get_brain_organoids_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[str], List[str]]: 54 """Get the Brain Organoids data. 55 56 Args: 57 path: Filepath to a folder where the downloaded data will be saved. 58 download: Whether to download the data if it is not present. 59 60 Returns: 61 List of filepaths for the image data. 62 List of filepaths for the label data. 63 """ 64 data_dir = get_brain_organoids_data(path, download) 65 66 raw_paths = natsorted(glob(os.path.join(data_dir, "imgs", "*"))) 67 label_paths = natsorted(glob(os.path.join(data_dir, "labels", "*.npy"))) 68 69 preprocessed_dir = os.path.join(data_dir, "labels_preprocessed") 70 os.makedirs(preprocessed_dir, exist_ok=True) 71 72 neu_label_paths = [] 73 for lpath in tqdm(label_paths, desc="Preprocessing labels"): 74 neu_lpath = lpath.replace("labels", "labels_preprocessed").replace(".npy", ".tif") 75 neu_label_paths.append(neu_lpath) 76 if os.path.exists(neu_lpath): 77 continue 78 79 label = np.load(lpath) 80 imageio.imwrite(neu_lpath, label) 81 82 return raw_paths, neu_label_paths
Get the Brain Organoids data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_brain_organoids_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
85def get_brain_organoids_dataset( 86 path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs 87) -> Dataset: 88 """Get the Brain Organoids dataset for organoid segmentation. 89 90 Args: 91 path: Filepath to a folder where the downloaded data will be saved. 92 patch_shape: The patch shape to use for training. 93 download: Whether to download the data if it is not present. 94 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 95 96 Returns: 97 The segmentation dataset. 98 """ 99 raw_paths, label_paths = get_brain_organoids_paths(path, download) 100 101 if "raw_transform" not in kwargs: 102 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 103 104 return torch_em.default_segmentation_dataset( 105 raw_paths=raw_paths, 106 raw_key=None, 107 label_paths=label_paths, 108 label_key=None, 109 is_seg_dataset=False, 110 with_channels=True, 111 patch_shape=patch_shape, 112 **kwargs 113 )
Get the Brain Organoids dataset for organoid segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_brain_organoids_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
116def get_brain_organoids_loader( 117 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs 118) -> DataLoader: 119 """Get the Brain Organoids dataloader for organoid segmentation. 120 121 Args: 122 path: Filepath to a folder where the downloaded data will be saved. 123 patch_shape: The patch shape to use for training. 124 download: Whether to download the data if it is not present. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 126 127 Returns: 128 The DataLoader. 129 """ 130 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 131 dataset = get_brain_organoids_dataset(path, patch_shape, download, **ds_kwargs) 132 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the Brain Organoids dataloader for organoid segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.