torch_em.data.datasets.light_microscopy.orgasegment
The OrgaSegment dataset contains annotations for organoid segmentation of intestinal patient derived organoids in bright field images.
This dataset is from the publication https://doi.org/10.1038/s42003-024-05966-4. Please cite it if you use this dataset for your research.
1"""The OrgaSegment dataset contains annotations for organoid segmentation 2of intestinal patient derived organoids in bright field images. 3 4This dataset is from the publication https://doi.org/10.1038/s42003-024-05966-4. 5Please cite it if you use this dataset for your research. 6""" 7 8import os 9import shutil 10from glob import glob 11from typing import Tuple, Union, Literal, List 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20URL = "https://zenodo.org/records/10278229/files/OrganoidBasic_v20211206.zip" 21CHECKSUM = "d067124d734108e46e18f65daaf17c89cb0a40bdacc6f6031815a6839e472798" 22 23 24def get_orgasegment_data( 25 path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False 26) -> str: 27 """Download the OrgaSegment dataset for organoid segmentation. 28 29 Args: 30 path: Filepath to a folder where the downloaded data will be saved. 31 split: The split to download. Either 'train', 'val or 'eval'. 32 download: Whether to download the data if it is not present. 33 34 Returns: 35 The filepath to the training data. 36 """ 37 os.makedirs(path, exist_ok=True) 38 39 data_dir = os.path.join(path, split) 40 if os.path.exists(data_dir): 41 return data_dir 42 43 zip_path = os.path.join(path, "OrganoidBasic_v20211206.zip") 44 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 45 util.unzip(zip_path=zip_path, dst=path, remove=True) 46 47 shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "train"), os.path.join(path, "train")) 48 shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "val"), os.path.join(path, "val")) 49 shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "eval"), os.path.join(path, "eval")) 50 shutil.rmtree(os.path.join(path, "OrganoidBasic_v20211206")) 51 52 return data_dir 53 54 55def get_orgasegment_paths( 56 path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False 57) -> Tuple[List[str], List[str]]: 58 """Get paths for the OrgaSegment data. 59 60 Args: 61 path: Filepath to a folder where the downloaded data will be saved. 62 split: The split to download. Either 'train', 'val or 'eval'. 63 download: Whether to download the data if it is not present. 64 65 Returns: 66 List of filepaths to the image data. 67 List of filepaths to the label data. 68 """ 69 data_dir = get_orgasegment_data(path=path, split=split, download=download) 70 71 image_paths = sorted(glob(os.path.join(data_dir, "*_img.jpg"))) 72 label_paths = sorted(glob(os.path.join(data_dir, "*_masks_organoid.png"))) 73 74 return image_paths, label_paths 75 76 77def get_orgasegment_dataset( 78 path: Union[os.PathLike, str], 79 patch_shape: Tuple[int, int], 80 split: Literal["train", "val", "eval"], 81 boundaries: bool = False, 82 binary: bool = False, 83 download: bool = False, 84 **kwargs 85) -> Dataset: 86 """Get the OrgaSegment dataset for organoid segmentation 87 88 Args: 89 path: Filepath to a folder where the downloaded data will be saved. 90 patch_shape: The patch shape to use for training. 91 split: The split to download. Either 'train', 'val or 'eval'. 92 boundaries: Whether to compute boundaries as the target. 93 binary: Whether to use a binary segmentation target. 94 download: Whether to download the data if it is not present. 95 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 96 97 Returns: 98 The segmentation dataset. 99 """ 100 assert split in ["train", "val", "eval"] 101 102 image_paths, label_paths = get_orgasegment_paths(path, split, download) 103 104 kwargs, _ = util.add_instance_label_transform(kwargs, add_binary_target=True, binary=binary, boundaries=boundaries) 105 106 return torch_em.default_segmentation_dataset( 107 raw_paths=image_paths, 108 raw_key=None, 109 label_paths=label_paths, 110 label_key=None, 111 patch_shape=patch_shape, 112 is_seg_dataset=False, 113 **kwargs 114 ) 115 116 117def get_orgasegment_loader( 118 path: Union[os.PathLike, str], 119 batch_size: int, 120 patch_shape: Tuple[int, int], 121 split: Literal["train", "val", "eval"], 122 boundaries: bool = False, 123 binary: bool = False, 124 download: bool = False, 125 **kwargs 126) -> DataLoader: 127 """Get the OrgaSegment dataloader for organoid segmentation 128 129 Args: 130 path: Filepath to a folder where the downloaded data will be saved. 131 batch_size: The batch size for training. 132 patch_shape: The patch shape to use for training. 133 split: The split to download. Either 'train', 'val or 'eval'. 134 boundaries: Whether to compute boundaries as the target. 135 binary: Whether to use a binary segmentation target. 136 download: Whether to download the data if it is not present. 137 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 138 139 Returns: 140 The DataLoader. 141 """ 142 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 143 dataset = get_orgasegment_dataset(path, patch_shape, split, boundaries, binary, download, **ds_kwargs) 144 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://zenodo.org/records/10278229/files/OrganoidBasic_v20211206.zip'
CHECKSUM =
'd067124d734108e46e18f65daaf17c89cb0a40bdacc6f6031815a6839e472798'
def
get_orgasegment_data( path: Union[os.PathLike, str], split: Literal['train', 'val', 'eval'], download: bool = False) -> str:
25def get_orgasegment_data( 26 path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False 27) -> str: 28 """Download the OrgaSegment dataset for organoid segmentation. 29 30 Args: 31 path: Filepath to a folder where the downloaded data will be saved. 32 split: The split to download. Either 'train', 'val or 'eval'. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The filepath to the training data. 37 """ 38 os.makedirs(path, exist_ok=True) 39 40 data_dir = os.path.join(path, split) 41 if os.path.exists(data_dir): 42 return data_dir 43 44 zip_path = os.path.join(path, "OrganoidBasic_v20211206.zip") 45 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 46 util.unzip(zip_path=zip_path, dst=path, remove=True) 47 48 shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "train"), os.path.join(path, "train")) 49 shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "val"), os.path.join(path, "val")) 50 shutil.move(os.path.join(path, "OrganoidBasic_v20211206", "eval"), os.path.join(path, "eval")) 51 shutil.rmtree(os.path.join(path, "OrganoidBasic_v20211206")) 52 53 return data_dir
Download the OrgaSegment dataset for organoid segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to download. Either 'train', 'val or 'eval'.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
def
get_orgasegment_paths( path: Union[os.PathLike, str], split: Literal['train', 'val', 'eval'], download: bool = False) -> Tuple[List[str], List[str]]:
56def get_orgasegment_paths( 57 path: Union[os.PathLike, str], split: Literal["train", "val", "eval"], download: bool = False 58) -> Tuple[List[str], List[str]]: 59 """Get paths for the OrgaSegment data. 60 61 Args: 62 path: Filepath to a folder where the downloaded data will be saved. 63 split: The split to download. Either 'train', 'val or 'eval'. 64 download: Whether to download the data if it is not present. 65 66 Returns: 67 List of filepaths to the image data. 68 List of filepaths to the label data. 69 """ 70 data_dir = get_orgasegment_data(path=path, split=split, download=download) 71 72 image_paths = sorted(glob(os.path.join(data_dir, "*_img.jpg"))) 73 label_paths = sorted(glob(os.path.join(data_dir, "*_masks_organoid.png"))) 74 75 return image_paths, label_paths
Get paths for the OrgaSegment data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to download. Either 'train', 'val or 'eval'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths to the image data. List of filepaths to the label data.
def
get_orgasegment_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val', 'eval'], boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
78def get_orgasegment_dataset( 79 path: Union[os.PathLike, str], 80 patch_shape: Tuple[int, int], 81 split: Literal["train", "val", "eval"], 82 boundaries: bool = False, 83 binary: bool = False, 84 download: bool = False, 85 **kwargs 86) -> Dataset: 87 """Get the OrgaSegment dataset for organoid segmentation 88 89 Args: 90 path: Filepath to a folder where the downloaded data will be saved. 91 patch_shape: The patch shape to use for training. 92 split: The split to download. Either 'train', 'val or 'eval'. 93 boundaries: Whether to compute boundaries as the target. 94 binary: Whether to use a binary segmentation target. 95 download: Whether to download the data if it is not present. 96 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 97 98 Returns: 99 The segmentation dataset. 100 """ 101 assert split in ["train", "val", "eval"] 102 103 image_paths, label_paths = get_orgasegment_paths(path, split, download) 104 105 kwargs, _ = util.add_instance_label_transform(kwargs, add_binary_target=True, binary=binary, boundaries=boundaries) 106 107 return torch_em.default_segmentation_dataset( 108 raw_paths=image_paths, 109 raw_key=None, 110 label_paths=label_paths, 111 label_key=None, 112 patch_shape=patch_shape, 113 is_seg_dataset=False, 114 **kwargs 115 )
Get the OrgaSegment dataset for organoid segmentation
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The split to download. Either 'train', 'val or 'eval'.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_orgasegment_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val', 'eval'], boundaries: bool = False, binary: bool = False, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
118def get_orgasegment_loader( 119 path: Union[os.PathLike, str], 120 batch_size: int, 121 patch_shape: Tuple[int, int], 122 split: Literal["train", "val", "eval"], 123 boundaries: bool = False, 124 binary: bool = False, 125 download: bool = False, 126 **kwargs 127) -> DataLoader: 128 """Get the OrgaSegment dataloader for organoid segmentation 129 130 Args: 131 path: Filepath to a folder where the downloaded data will be saved. 132 batch_size: The batch size for training. 133 patch_shape: The patch shape to use for training. 134 split: The split to download. Either 'train', 'val or 'eval'. 135 boundaries: Whether to compute boundaries as the target. 136 binary: Whether to use a binary segmentation target. 137 download: Whether to download the data if it is not present. 138 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 139 140 Returns: 141 The DataLoader. 142 """ 143 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 144 dataset = get_orgasegment_dataset(path, patch_shape, split, boundaries, binary, download, **ds_kwargs) 145 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the OrgaSegment dataloader for organoid segmentation
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The split to download. Either 'train', 'val or 'eval'.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.