torch_em.data.datasets.light_microscopy.cellpose
This dataset contains annotation for cell segmentation in fluorescene microscently-labeled microscopy images.
This dataset is from the following publications:
- https://doi.org/10.1038/s41592-020-01018-x
- https://doi.org/10.1038/s41592-022-01663-4 Please cite it if you use this dataset in your research.
1"""This dataset contains annotation for cell segmentation in fluorescene microscently-labeled microscopy images. 2 3This dataset is from the following publications: 4- https://doi.org/10.1038/s41592-020-01018-x 5- https://doi.org/10.1038/s41592-022-01663-4 6Please cite it if you use this dataset in your research. 7""" 8 9import os 10from glob import glob 11from natsort import natsorted 12from typing import Union, Tuple, Literal, Optional, List 13 14import torch_em 15 16from torch.utils.data import Dataset, DataLoader 17 18from .. import util 19from .neurips_cell_seg import to_rgb 20 21 22AVAILABLE_CHOICES = ["cyto", "cyto2"] 23 24 25def get_cellpose_data( 26 path: Union[os.PathLike, str], 27 split: Literal["train", "test"], 28 choice: Literal["cyto", "cyto2"], 29 download: bool = False, 30) -> str: 31 """Instruction to download CellPose data. 32 33 NOTE: Please download the dataset from "https://www.cellpose.org/dataset". 34 35 Args: 36 path: Filepath to a folder where the data is downloaded for further processing. 37 split: The data split to use. Either 'train', or 'test'. 38 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 39 download: Whether to download the data if it is not present. 40 41 Returns: 42 The filepath to the folder where the data is manually downloaded. 43 """ 44 per_choice_dir = os.path.join(path, choice) # path where the unzipped files will be stored 45 if choice == "cyto": 46 assert split in ["train", "test"], f"'{split}' is not a valid split in '{choice}'." 47 zip_path = os.path.join(path, f"{split}.zip") 48 data_dir = os.path.join(per_choice_dir, split) # path where the per split images for 'cyto' exist. 49 elif choice == "cyto2": 50 assert split == "train", f"'{split}' is not a valid split in '{choice}'." 51 zip_path = os.path.join(path, "train_cyto2.zip") 52 data_dir = os.path.join(per_choice_dir, "train_cyto2") # path where 'train' split images for 'cyto2' exist. 53 else: 54 raise ValueError(f"'{choice}' is not a valid dataset choice.") 55 56 if os.path.exists(data_dir): 57 return data_dir 58 else: 59 if not os.path.exists(zip_path) and download: 60 raise NotImplementedError( 61 "The dataset cannot be automatically downloaded. " 62 "Please see 'get_cellpose_data' in 'torch_em/data/datasets/light_microscopy/cellpose.py' for details." 63 ) 64 util.unzip(zip_path=zip_path, dst=per_choice_dir, remove=False) 65 66 return data_dir 67 68 69def get_cellpose_paths( 70 path: Union[os.PathLike, str], 71 split: Literal['train', 'test'], 72 choice: Optional[Literal["cyto", "cyto2"]] = None, 73 download: bool = False, 74) -> Tuple[List[str], List[str]]: 75 """Get paths to the CellPose data. 76 77 Args: 78 path: Filepath to a folder where the downloaded data will be saved. 79 split: The data split to use. Either 'train', or 'test'. 80 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 81 download: Whether to download the data if it is not present. 82 83 Returns: 84 List of filepaths for the image data. 85 List of filepaths for the label data. 86 """ 87 data_dir = get_cellpose_data(path, split, choice, download) 88 89 image_paths = natsorted(glob(os.path.join(data_dir, "*_img.png"))) 90 gt_paths = natsorted(glob(os.path.join(data_dir, "*_masks.png"))) 91 92 assert len(image_paths) == len(gt_paths) and len(image_paths) > 0 93 94 return image_paths, gt_paths 95 96 97def get_cellpose_dataset( 98 path: Union[os.PathLike, str], 99 patch_shape: Tuple[int, int], 100 split: Literal["train", "test"], 101 choice: Optional[Literal["cyto", "cyto2"]] = None, 102 download: bool = False, 103 **kwargs 104) -> Dataset: 105 """Get the CellPose dataset for cell segmentation. 106 107 Args: 108 path: Filepath to a folder where the downloaded data will be saved. 109 patch_shape: The patch shape to use for training. 110 split: The data split to use. Either 'train', or 'test'. 111 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 112 download: Whether to download the data if it is not present. 113 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 114 115 Returns: 116 The segmentation dataset. 117 """ 118 assert split in ["train", "test"] 119 120 if choice is None: 121 choice = AVAILABLE_CHOICES 122 else: 123 if not isinstance(choice, list): 124 choice = [choice] 125 126 image_paths, gt_paths = [], [] 127 for per_choice in choice: 128 assert per_choice in AVAILABLE_CHOICES 129 per_image_paths, per_gt_paths = get_cellpose_paths(path, split, per_choice, download) 130 image_paths.extend(per_image_paths) 131 gt_paths.extend(per_gt_paths) 132 133 if "raw_transform" not in kwargs: 134 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 135 136 if "transform" not in kwargs: 137 kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2) 138 139 return torch_em.default_segmentation_dataset( 140 raw_paths=image_paths, 141 raw_key=None, 142 label_paths=gt_paths, 143 label_key=None, 144 is_seg_dataset=False, 145 patch_shape=patch_shape, 146 ndim=2, 147 **kwargs 148 ) 149 150 151def get_cellpose_loader( 152 path: Union[os.PathLike, str], 153 batch_size: int, 154 patch_shape: Tuple[int, int], 155 split: Literal["train", "test"], 156 choice: Optional[Literal["cyto", "cyto2"]] = None, 157 download: bool = False, 158 **kwargs 159) -> DataLoader: 160 """Get the CellPose dataloader for cell segmentation. 161 162 Args: 163 path: Filepath to a folder where the downloaded data will be saved. 164 batch_size: The batch size for training. 165 patch_shape: The patch shape to use for training. 166 split: The data split to use. Either 'train', or 'test'. 167 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 168 download: Whether to download the data if it is not present. 169 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 170 171 Returns: 172 The DataLoader. 173 """ 174 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 175 dataset = get_cellpose_dataset(path, patch_shape, split, choice, download, **ds_kwargs) 176 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
AVAILABLE_CHOICES =
['cyto', 'cyto2']
def
get_cellpose_data( path: Union[os.PathLike, str], split: Literal['train', 'test'], choice: Literal['cyto', 'cyto2'], download: bool = False) -> str:
26def get_cellpose_data( 27 path: Union[os.PathLike, str], 28 split: Literal["train", "test"], 29 choice: Literal["cyto", "cyto2"], 30 download: bool = False, 31) -> str: 32 """Instruction to download CellPose data. 33 34 NOTE: Please download the dataset from "https://www.cellpose.org/dataset". 35 36 Args: 37 path: Filepath to a folder where the data is downloaded for further processing. 38 split: The data split to use. Either 'train', or 'test'. 39 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 40 download: Whether to download the data if it is not present. 41 42 Returns: 43 The filepath to the folder where the data is manually downloaded. 44 """ 45 per_choice_dir = os.path.join(path, choice) # path where the unzipped files will be stored 46 if choice == "cyto": 47 assert split in ["train", "test"], f"'{split}' is not a valid split in '{choice}'." 48 zip_path = os.path.join(path, f"{split}.zip") 49 data_dir = os.path.join(per_choice_dir, split) # path where the per split images for 'cyto' exist. 50 elif choice == "cyto2": 51 assert split == "train", f"'{split}' is not a valid split in '{choice}'." 52 zip_path = os.path.join(path, "train_cyto2.zip") 53 data_dir = os.path.join(per_choice_dir, "train_cyto2") # path where 'train' split images for 'cyto2' exist. 54 else: 55 raise ValueError(f"'{choice}' is not a valid dataset choice.") 56 57 if os.path.exists(data_dir): 58 return data_dir 59 else: 60 if not os.path.exists(zip_path) and download: 61 raise NotImplementedError( 62 "The dataset cannot be automatically downloaded. " 63 "Please see 'get_cellpose_data' in 'torch_em/data/datasets/light_microscopy/cellpose.py' for details." 64 ) 65 util.unzip(zip_path=zip_path, dst=per_choice_dir, remove=False) 66 67 return data_dir
Instruction to download CellPose data.
NOTE: Please download the dataset from "https://wwwtorch_em.data.datasets.light_microscopy.cellpose.org/dataset".
Arguments:
- path: Filepath to a folder where the data is downloaded for further processing.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of dataset. Either 'cyto' or 'cyto2'.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the folder where the data is manually downloaded.
def
get_cellpose_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], choice: Optional[Literal['cyto', 'cyto2']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
70def get_cellpose_paths( 71 path: Union[os.PathLike, str], 72 split: Literal['train', 'test'], 73 choice: Optional[Literal["cyto", "cyto2"]] = None, 74 download: bool = False, 75) -> Tuple[List[str], List[str]]: 76 """Get paths to the CellPose data. 77 78 Args: 79 path: Filepath to a folder where the downloaded data will be saved. 80 split: The data split to use. Either 'train', or 'test'. 81 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 82 download: Whether to download the data if it is not present. 83 84 Returns: 85 List of filepaths for the image data. 86 List of filepaths for the label data. 87 """ 88 data_dir = get_cellpose_data(path, split, choice, download) 89 90 image_paths = natsorted(glob(os.path.join(data_dir, "*_img.png"))) 91 gt_paths = natsorted(glob(os.path.join(data_dir, "*_masks.png"))) 92 93 assert len(image_paths) == len(gt_paths) and len(image_paths) > 0 94 95 return image_paths, gt_paths
Get paths to the CellPose data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of dataset. Either 'cyto' or 'cyto2'.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_cellpose_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['cyto', 'cyto2']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
98def get_cellpose_dataset( 99 path: Union[os.PathLike, str], 100 patch_shape: Tuple[int, int], 101 split: Literal["train", "test"], 102 choice: Optional[Literal["cyto", "cyto2"]] = None, 103 download: bool = False, 104 **kwargs 105) -> Dataset: 106 """Get the CellPose dataset for cell segmentation. 107 108 Args: 109 path: Filepath to a folder where the downloaded data will be saved. 110 patch_shape: The patch shape to use for training. 111 split: The data split to use. Either 'train', or 'test'. 112 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 113 download: Whether to download the data if it is not present. 114 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 115 116 Returns: 117 The segmentation dataset. 118 """ 119 assert split in ["train", "test"] 120 121 if choice is None: 122 choice = AVAILABLE_CHOICES 123 else: 124 if not isinstance(choice, list): 125 choice = [choice] 126 127 image_paths, gt_paths = [], [] 128 for per_choice in choice: 129 assert per_choice in AVAILABLE_CHOICES 130 per_image_paths, per_gt_paths = get_cellpose_paths(path, split, per_choice, download) 131 image_paths.extend(per_image_paths) 132 gt_paths.extend(per_gt_paths) 133 134 if "raw_transform" not in kwargs: 135 kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb) 136 137 if "transform" not in kwargs: 138 kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2) 139 140 return torch_em.default_segmentation_dataset( 141 raw_paths=image_paths, 142 raw_key=None, 143 label_paths=gt_paths, 144 label_key=None, 145 is_seg_dataset=False, 146 patch_shape=patch_shape, 147 ndim=2, 148 **kwargs 149 )
Get the CellPose dataset for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of dataset. Either 'cyto' or 'cyto2'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_cellpose_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['cyto', 'cyto2']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
152def get_cellpose_loader( 153 path: Union[os.PathLike, str], 154 batch_size: int, 155 patch_shape: Tuple[int, int], 156 split: Literal["train", "test"], 157 choice: Optional[Literal["cyto", "cyto2"]] = None, 158 download: bool = False, 159 **kwargs 160) -> DataLoader: 161 """Get the CellPose dataloader for cell segmentation. 162 163 Args: 164 path: Filepath to a folder where the downloaded data will be saved. 165 batch_size: The batch size for training. 166 patch_shape: The patch shape to use for training. 167 split: The data split to use. Either 'train', or 'test'. 168 choice: The choice of dataset. Either 'cyto' or 'cyto2'. 169 download: Whether to download the data if it is not present. 170 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 171 172 Returns: 173 The DataLoader. 174 """ 175 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 176 dataset = get_cellpose_dataset(path, patch_shape, split, choice, download, **ds_kwargs) 177 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the CellPose dataloader for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The data split to use. Either 'train', or 'test'.
- choice: The choice of dataset. Either 'cyto' or 'cyto2'.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.