torch_em.data.datasets.light_microscopy.deepseas
The DeepSeas dataset contains annotations for cell segmentation in phase-contrast microscopy images of stem cells, bronchial cells and muscle cells.
NOTE: Please download the dataset manually from https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing. The original data is located at: https://drive.google.com/drive/folders/13RhhBAetSWkjySyhJcDqj_FaO09hxkhO?usp=sharing. The tracking data is located at: https://drive.google.com/drive/folders/10LWey85fgHgFj_myIr1CYSOviD4SleE4?usp=sharing.
The dataset is located at https://deepseas.org/datasets/. The codebase for this dataset is located at https://github.com/abzargar/DeepSea. Please cite them if you use this dataset for your research.
1"""The DeepSeas dataset contains annotations for cell segmentation in 2phase-contrast microscopy images of stem cells, bronchial cells and muscle cells. 3 4NOTE: Please download the dataset manually from https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing. 5The original data is located at: https://drive.google.com/drive/folders/13RhhBAetSWkjySyhJcDqj_FaO09hxkhO?usp=sharing. 6The tracking data is located at: https://drive.google.com/drive/folders/10LWey85fgHgFj_myIr1CYSOviD4SleE4?usp=sharing. 7 8The dataset is located at https://deepseas.org/datasets/. 9The codebase for this dataset is located at https://github.com/abzargar/DeepSea. 10Please cite them if you use this dataset for your research. 11""" # noqa 12 13import os 14from glob import glob 15from natsort import natsorted 16from typing import Union, Tuple, Literal, List 17 18from torch.utils.data import Dataset, DataLoader 19 20import torch_em 21 22from .. import util 23 24 25URL = "https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing" 26 27 28def get_deepseas_data(path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str: 29 """Obtain the DeepSeas dataset. 30 31 NOTE: You need to manually download the 'segmentation_dataset' from the link: 32 - https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing. 33 34 Args: 35 path: Filepath to a folder where the downloaded data will be stored. 36 split: The choice of data split. 37 download: Whether to download the data if it is not present. 38 39 Returns: 40 Filepath where the data is manually downloaded for further processing. 41 """ 42 if split not in ["train", "test"]: 43 raise ValueError(f"'{split}' is not a valid split choice.") 44 45 data_dir = os.path.join(path, "segmentation_dataset", split) 46 if os.path.exists(data_dir): 47 return data_dir 48 49 zip_paths = glob(os.path.join(path, "*.zip")) 50 if len(zip_paths) == 0 or download: 51 raise NotImplementedError( 52 "Automatic download for DeepSeas data is not supported at the moment. " 53 f"Please download the 'segmentation_dataset' from {URL} and place the zip files at {path}." 54 ) 55 56 for zip_path in zip_paths: 57 util.unzip(zip_path=zip_path, dst=path, remove=False) 58 59 return data_dir 60 61 62def get_deepseas_paths( 63 path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False 64) -> Tuple[List[str], List[str]]: 65 """Get paths to the DeepSeas data. 66 67 Args: 68 path: Filepath to a folder where the downloaded data will be stored. 69 split: The choice of data split. 70 download: Whether to download the data if it is not present. 71 72 Returns: 73 List of filepaths for the image data. 74 List of filepaths for the label data. 75 """ 76 data_dir = get_deepseas_data(path, split, download) 77 78 raw_paths = natsorted(glob(os.path.join(data_dir, "images", "*.png"))) 79 label_paths = natsorted(glob(os.path.join(data_dir, "masks", "*.png"))) 80 81 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 82 83 return raw_paths, label_paths 84 85 86def get_deepseas_dataset( 87 path: Union[os.PathLike, str], 88 patch_shape: Tuple[int, int], 89 split: Literal['train', 'test'], 90 download: bool = False, 91 **kwargs 92) -> Dataset: 93 """Get the DeepSeas dataset for cell segmentation. 94 95 Args: 96 path: Filepath to a folder where the downloaded data will be stored. 97 patch_shape: The patch shape to use for training. 98 split: The choice of data split. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 101 102 Returns: 103 The segmentation dataset. 104 """ 105 raw_paths, label_paths = get_deepseas_paths(path, split, download) 106 107 return torch_em.default_segmentation_dataset( 108 raw_paths=raw_paths, 109 raw_key=None, 110 label_paths=label_paths, 111 label_key=None, 112 patch_shape=patch_shape, 113 is_seg_dataset=False, 114 with_channels=True, 115 **kwargs 116 ) 117 118 119def get_deepseas_loader( 120 path: Union[os.PathLike, str], 121 batch_size: int, 122 patch_shape: Tuple[int, int], 123 split: Literal['train', 'test'], 124 download: bool = False, 125 **kwargs 126) -> DataLoader: 127 """Get the DeepSeas dataloader for cell segmentation. 128 129 Args: 130 path: Filepath to a folder where the downloaded data will be stored. 131 batch_size: The batch size for training. 132 patch_shape: The patch shape to use for training. 133 split: The choice of data split. 134 download: Whether to download the data if it is not present. 135 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 136 137 Returns: 138 The DataLoader. 139 """ 140 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 141 dataset = get_deepseas_dataset(path, patch_shape, split, download, **ds_kwargs) 142 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
29def get_deepseas_data(path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str: 30 """Obtain the DeepSeas dataset. 31 32 NOTE: You need to manually download the 'segmentation_dataset' from the link: 33 - https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing. 34 35 Args: 36 path: Filepath to a folder where the downloaded data will be stored. 37 split: The choice of data split. 38 download: Whether to download the data if it is not present. 39 40 Returns: 41 Filepath where the data is manually downloaded for further processing. 42 """ 43 if split not in ["train", "test"]: 44 raise ValueError(f"'{split}' is not a valid split choice.") 45 46 data_dir = os.path.join(path, "segmentation_dataset", split) 47 if os.path.exists(data_dir): 48 return data_dir 49 50 zip_paths = glob(os.path.join(path, "*.zip")) 51 if len(zip_paths) == 0 or download: 52 raise NotImplementedError( 53 "Automatic download for DeepSeas data is not supported at the moment. " 54 f"Please download the 'segmentation_dataset' from {URL} and place the zip files at {path}." 55 ) 56 57 for zip_path in zip_paths: 58 util.unzip(zip_path=zip_path, dst=path, remove=False) 59 60 return data_dir
Obtain the DeepSeas dataset.
NOTE: You need to manually download the 'segmentation_dataset' from the link:
Arguments:
- path: Filepath to a folder where the downloaded data will be stored.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
Filepath where the data is manually downloaded for further processing.
63def get_deepseas_paths( 64 path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False 65) -> Tuple[List[str], List[str]]: 66 """Get paths to the DeepSeas data. 67 68 Args: 69 path: Filepath to a folder where the downloaded data will be stored. 70 split: The choice of data split. 71 download: Whether to download the data if it is not present. 72 73 Returns: 74 List of filepaths for the image data. 75 List of filepaths for the label data. 76 """ 77 data_dir = get_deepseas_data(path, split, download) 78 79 raw_paths = natsorted(glob(os.path.join(data_dir, "images", "*.png"))) 80 label_paths = natsorted(glob(os.path.join(data_dir, "masks", "*.png"))) 81 82 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 83 84 return raw_paths, label_paths
Get paths to the DeepSeas data.
Arguments:
- path: Filepath to a folder where the downloaded data will be stored.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
87def get_deepseas_dataset( 88 path: Union[os.PathLike, str], 89 patch_shape: Tuple[int, int], 90 split: Literal['train', 'test'], 91 download: bool = False, 92 **kwargs 93) -> Dataset: 94 """Get the DeepSeas dataset for cell segmentation. 95 96 Args: 97 path: Filepath to a folder where the downloaded data will be stored. 98 patch_shape: The patch shape to use for training. 99 split: The choice of data split. 100 download: Whether to download the data if it is not present. 101 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 102 103 Returns: 104 The segmentation dataset. 105 """ 106 raw_paths, label_paths = get_deepseas_paths(path, split, download) 107 108 return torch_em.default_segmentation_dataset( 109 raw_paths=raw_paths, 110 raw_key=None, 111 label_paths=label_paths, 112 label_key=None, 113 patch_shape=patch_shape, 114 is_seg_dataset=False, 115 with_channels=True, 116 **kwargs 117 )
Get the DeepSeas dataset for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be stored.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
120def get_deepseas_loader( 121 path: Union[os.PathLike, str], 122 batch_size: int, 123 patch_shape: Tuple[int, int], 124 split: Literal['train', 'test'], 125 download: bool = False, 126 **kwargs 127) -> DataLoader: 128 """Get the DeepSeas dataloader for cell segmentation. 129 130 Args: 131 path: Filepath to a folder where the downloaded data will be stored. 132 batch_size: The batch size for training. 133 patch_shape: The patch shape to use for training. 134 split: The choice of data split. 135 download: Whether to download the data if it is not present. 136 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 137 138 Returns: 139 The DataLoader. 140 """ 141 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 142 dataset = get_deepseas_dataset(path, patch_shape, split, download, **ds_kwargs) 143 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the DeepSeas dataloader for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be stored.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- split: The choice of data split.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.