torch_em.data.datasets.light_microscopy.ctc
The Cell Tracking Challenge contains annotated data for cell segmentation and tracking.
We currently cprovide the 2d datasets with segmentation annotations. If you use this data in your research please cite https://doi.org/10.1038/nmeth.4473.
1"""The Cell Tracking Challenge contains annotated data for cell segmentation and tracking. 2 3We currently cprovide the 2d datasets with segmentation annotations. 4If you use this data in your research please cite https://doi.org/10.1038/nmeth.4473. 5""" 6 7import os 8from glob import glob 9from shutil import copyfile 10from typing import Optional, Tuple, Union 11 12import torch_em 13from torch.utils.data import Dataset, DataLoader 14from .. import util 15 16 17CTC_CHECKSUMS = { 18 "train": { 19 "BF-C2DL-HSC": "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1", 20 "BF-C2DL-MuSC": "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d", 21 "DIC-C2DH-HeLa": "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a", 22 "Fluo-C2DL-Huh7": "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d", 23 "Fluo-C2DL-MSC": "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2", 24 "Fluo-N2DH-GOWT1": "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c", 25 "Fluo-N2DH-SIM+": "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8", 26 "Fluo-N2DL-HeLa": "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e", 27 "PhC-C2DH-U373": "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e", 28 "PhC-C2DL-PSC": "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422", 29 }, 30 "test": { 31 "BF-C2DL-HSC": "fd1c05ec625fd0526c8369d1139babe137e885457eee98c10d957da578d0d5bc", 32 "BF-C2DL-MuSC": "c5cae259e6090e82a2596967fb54c8a768717c1772398f8546ad1c8df0820450", 33 "DIC-C2DH-HeLa": "5e5d5f2aa90aef99d750cf03f5c12d799d50b892f98c86950e07a2c5955ac01f", 34 "Fluo-C2DL-Huh7": "cc7359f8fb6b0c43995365e83ce0116d32f477ac644b2ca02b98bc253e2bcbbe", 35 "Fluo-C2DL-MSC": "c90b13e603dde52f17801d4f0cadde04ed7f21cc05296b1f0957d92dbfc8ffa6", 36 "Fluo-N2DH-GOWT1": "c6893ec2d63459de49d4dc21009b04275573403c62cc02e6ee8d0cb1a5068add", 37 "Fluo-N2DH-SIM+": "c4f257add739b284d02176057814de345dee2ac1a7438e360ccd2df73618db68", 38 "Fluo-N2DL-HeLa": "45cf3daf05e8495aa2ce0febacca4cf0928fab808c0b14ed2eb7289a819e6bb8", 39 "PhC-C2DH-U373": "7aa3162e4363a416b259149adc13c9b09cb8aecfe8165eb1428dd534b66bec8a", 40 "PhC-C2DL-PSC": "8c98ac6203e7490157ceb6aa1131d60a3863001b61fb75e784bc49d47ee264d5", 41 } 42} 43 44 45def _get_ctc_url_and_checksum(dataset_name, split): 46 if split == "train": 47 _link_to_split = "training-datasets" 48 else: 49 _link_to_split = "test-datasets" 50 51 url = f"http://data.celltrackingchallenge.net/{_link_to_split}/{dataset_name}.zip" 52 checksum = CTC_CHECKSUMS[split][dataset_name] 53 return url, checksum 54 55 56def get_ctc_data( 57 path: Union[os.PathLike, str], 58 dataset_name: str, 59 download: bool, 60 split: str 61) -> str: 62 f"""Download training data from the cell tracking challenge. 63 64 Args: 65 path: Filepath to a folder where the downloaded data will be saved. 66 dataset_name: Name of the dataset to be downloaded. The available datasets are: 67 {', '.join(CTC_CHECKSUMS['train'].keys())} 68 download: Whether to download the data if it is not present. 69 split: The split to download. Either 'train' or 'test'. 70 71 Returns: 72 The filepath to the training data. 73 """ 74 dataset_names = list(CTC_CHECKSUMS["train"].keys()) 75 if dataset_name not in dataset_names: 76 raise ValueError(f"Invalid dataset: {dataset_name}, choose one of {dataset_names}.") 77 78 data_path = os.path.join(path, split, dataset_name) 79 80 if os.path.exists(data_path): 81 return data_path 82 83 os.makedirs(data_path) 84 url, checksum = _get_ctc_url_and_checksum(dataset_name, split) 85 zip_path = os.path.join(path, f"{dataset_name}.zip") 86 util.download_source(zip_path, url, download, checksum=checksum) 87 util.unzip(zip_path, os.path.join(path, split), remove=True) 88 89 return data_path 90 91 92def _require_gt_images(data_path, vol_ids): 93 image_paths, label_paths = [], [] 94 95 if isinstance(vol_ids, str): 96 vol_ids = [vol_ids] 97 98 for vol_id in vol_ids: 99 image_folder = os.path.join(data_path, vol_id) 100 assert os.path.join(image_folder), f"Cannot find volume id, {vol_id} in {data_path}." 101 102 label_folder = os.path.join(data_path, f"{vol_id}_GT", "SEG") 103 104 # copy over the images corresponding to the labeled frames 105 label_image_folder = os.path.join(data_path, f"{vol_id}_GT", "IM") 106 os.makedirs(label_image_folder, exist_ok=True) 107 108 this_label_paths = glob(os.path.join(label_folder, "*.tif")) 109 for label_path in this_label_paths: 110 fname = os.path.basename(label_path) 111 image_label_path = os.path.join(label_image_folder, fname) 112 if not os.path.exists(image_label_path): 113 im_name = "t" + fname.lstrip("main_seg") 114 image_path = os.path.join(image_folder, im_name) 115 assert os.path.join(image_path), image_path 116 copyfile(image_path, image_label_path) 117 118 image_paths.append(label_image_folder) 119 label_paths.append(label_folder) 120 121 return image_paths, label_paths 122 123 124def get_ctc_segmentation_dataset( 125 path: Union[os.PathLike, str], 126 dataset_name: str, 127 patch_shape: Tuple[int, int, int], 128 split: str = "train", 129 vol_id: Optional[int] = None, 130 download: bool = False, 131 **kwargs, 132) -> Dataset: 133 """Get the CTC dataset for cell segmentation. 134 135 Args: 136 path: Filepath to a folder where the downloaded data will be saved. 137 dataset_name: Name of the dataset to be downloaded. The available datasets are: 138 {', '.join(CTC_CHECKSUMS['train'].keys())} 139 patch_shape: The patch shape to use for training. 140 split: The split to download. Currently only supports 'train'. 141 vol_id: The train id to load. 142 download: Whether to download the data if it is not present. 143 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 144 145 Returns: 146 The segmentation dataset. 147 """ 148 assert split in ["train"] 149 150 data_path = get_ctc_data(path, dataset_name, download, split) 151 152 if vol_id is None: 153 vol_ids = glob(os.path.join(data_path, "*_GT")) 154 vol_ids = [os.path.basename(vol_id) for vol_id in vol_ids] 155 vol_ids = [vol_id.rstrip("_GT") for vol_id in vol_ids] 156 else: 157 vol_ids = vol_id 158 159 image_path, label_path = _require_gt_images(data_path, vol_ids) 160 161 kwargs = util.update_kwargs(kwargs, "ndim", 2) 162 return torch_em.default_segmentation_dataset( 163 image_path, "*.tif", label_path, "*.tif", patch_shape, is_seg_dataset=True, **kwargs 164 ) 165 166 167def get_ctc_segmentation_loader( 168 path: Union[os.PathLike, str], 169 dataset_name: str, 170 patch_shape: Tuple[int, int, int], 171 batch_size: int, 172 split: str = "train", 173 vol_id: Optional[int] = None, 174 download: bool = False, 175 **kwargs, 176) -> DataLoader: 177 """Get the CTC dataloader for cell segmentation. 178 179 Args: 180 path: Filepath to a folder where the downloaded data will be saved. 181 dataset_name: Name of the dataset to be downloaded. The available datasets are: 182 {', '.join(CTC_CHECKSUMS['train'].keys())} 183 patch_shape: The patch shape to use for training. 184 batch_size: The batch size for training. 185 split: The split to download. Currently only supports 'train'. 186 vol_id: The train id to load. 187 download: Whether to download the data if it is not present. 188 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 189 190 Returns: 191 The DataLoader. 192 """ 193 ds_kwargs, loader_kwargs = util.split_kwargs( 194 torch_em.default_segmentation_dataset, **kwargs 195 ) 196 dataset = get_ctc_segmentation_dataset( 197 path, dataset_name, patch_shape, split=split, vol_id=vol_id, download=download, **ds_kwargs, 198 ) 199 200 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 201 return loader
CTC_CHECKSUMS =
{'train': {'BF-C2DL-HSC': '0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1', 'BF-C2DL-MuSC': 'ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d', 'DIC-C2DH-HeLa': '832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a', 'Fluo-C2DL-Huh7': '1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d', 'Fluo-C2DL-MSC': 'a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2', 'Fluo-N2DH-GOWT1': '1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c', 'Fluo-N2DH-SIM+': '3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8', 'Fluo-N2DL-HeLa': '35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e', 'PhC-C2DH-U373': 'b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e', 'PhC-C2DL-PSC': '9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422'}, 'test': {'BF-C2DL-HSC': 'fd1c05ec625fd0526c8369d1139babe137e885457eee98c10d957da578d0d5bc', 'BF-C2DL-MuSC': 'c5cae259e6090e82a2596967fb54c8a768717c1772398f8546ad1c8df0820450', 'DIC-C2DH-HeLa': '5e5d5f2aa90aef99d750cf03f5c12d799d50b892f98c86950e07a2c5955ac01f', 'Fluo-C2DL-Huh7': 'cc7359f8fb6b0c43995365e83ce0116d32f477ac644b2ca02b98bc253e2bcbbe', 'Fluo-C2DL-MSC': 'c90b13e603dde52f17801d4f0cadde04ed7f21cc05296b1f0957d92dbfc8ffa6', 'Fluo-N2DH-GOWT1': 'c6893ec2d63459de49d4dc21009b04275573403c62cc02e6ee8d0cb1a5068add', 'Fluo-N2DH-SIM+': 'c4f257add739b284d02176057814de345dee2ac1a7438e360ccd2df73618db68', 'Fluo-N2DL-HeLa': '45cf3daf05e8495aa2ce0febacca4cf0928fab808c0b14ed2eb7289a819e6bb8', 'PhC-C2DH-U373': '7aa3162e4363a416b259149adc13c9b09cb8aecfe8165eb1428dd534b66bec8a', 'PhC-C2DL-PSC': '8c98ac6203e7490157ceb6aa1131d60a3863001b61fb75e784bc49d47ee264d5'}}
def
get_ctc_data( path: Union[os.PathLike, str], dataset_name: str, download: bool, split: str) -> str:
57def get_ctc_data( 58 path: Union[os.PathLike, str], 59 dataset_name: str, 60 download: bool, 61 split: str 62) -> str: 63 f"""Download training data from the cell tracking challenge. 64 65 Args: 66 path: Filepath to a folder where the downloaded data will be saved. 67 dataset_name: Name of the dataset to be downloaded. The available datasets are: 68 {', '.join(CTC_CHECKSUMS['train'].keys())} 69 download: Whether to download the data if it is not present. 70 split: The split to download. Either 'train' or 'test'. 71 72 Returns: 73 The filepath to the training data. 74 """ 75 dataset_names = list(CTC_CHECKSUMS["train"].keys()) 76 if dataset_name not in dataset_names: 77 raise ValueError(f"Invalid dataset: {dataset_name}, choose one of {dataset_names}.") 78 79 data_path = os.path.join(path, split, dataset_name) 80 81 if os.path.exists(data_path): 82 return data_path 83 84 os.makedirs(data_path) 85 url, checksum = _get_ctc_url_and_checksum(dataset_name, split) 86 zip_path = os.path.join(path, f"{dataset_name}.zip") 87 util.download_source(zip_path, url, download, checksum=checksum) 88 util.unzip(zip_path, os.path.join(path, split), remove=True) 89 90 return data_path
def
get_ctc_segmentation_dataset( path: Union[os.PathLike, str], dataset_name: str, patch_shape: Tuple[int, int, int], split: str = 'train', vol_id: Optional[int] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
125def get_ctc_segmentation_dataset( 126 path: Union[os.PathLike, str], 127 dataset_name: str, 128 patch_shape: Tuple[int, int, int], 129 split: str = "train", 130 vol_id: Optional[int] = None, 131 download: bool = False, 132 **kwargs, 133) -> Dataset: 134 """Get the CTC dataset for cell segmentation. 135 136 Args: 137 path: Filepath to a folder where the downloaded data will be saved. 138 dataset_name: Name of the dataset to be downloaded. The available datasets are: 139 {', '.join(CTC_CHECKSUMS['train'].keys())} 140 patch_shape: The patch shape to use for training. 141 split: The split to download. Currently only supports 'train'. 142 vol_id: The train id to load. 143 download: Whether to download the data if it is not present. 144 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 145 146 Returns: 147 The segmentation dataset. 148 """ 149 assert split in ["train"] 150 151 data_path = get_ctc_data(path, dataset_name, download, split) 152 153 if vol_id is None: 154 vol_ids = glob(os.path.join(data_path, "*_GT")) 155 vol_ids = [os.path.basename(vol_id) for vol_id in vol_ids] 156 vol_ids = [vol_id.rstrip("_GT") for vol_id in vol_ids] 157 else: 158 vol_ids = vol_id 159 160 image_path, label_path = _require_gt_images(data_path, vol_ids) 161 162 kwargs = util.update_kwargs(kwargs, "ndim", 2) 163 return torch_em.default_segmentation_dataset( 164 image_path, "*.tif", label_path, "*.tif", patch_shape, is_seg_dataset=True, **kwargs 165 )
Get the CTC dataset for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- dataset_name: Name of the dataset to be downloaded. The available datasets are: {', '.join(CTC_CHECKSUMS['train'].keys())}
- patch_shape: The patch shape to use for training.
- split: The split to download. Currently only supports 'train'.
- vol_id: The train id to load.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_ctc_segmentation_loader( path: Union[os.PathLike, str], dataset_name: str, patch_shape: Tuple[int, int, int], batch_size: int, split: str = 'train', vol_id: Optional[int] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
168def get_ctc_segmentation_loader( 169 path: Union[os.PathLike, str], 170 dataset_name: str, 171 patch_shape: Tuple[int, int, int], 172 batch_size: int, 173 split: str = "train", 174 vol_id: Optional[int] = None, 175 download: bool = False, 176 **kwargs, 177) -> DataLoader: 178 """Get the CTC dataloader for cell segmentation. 179 180 Args: 181 path: Filepath to a folder where the downloaded data will be saved. 182 dataset_name: Name of the dataset to be downloaded. The available datasets are: 183 {', '.join(CTC_CHECKSUMS['train'].keys())} 184 patch_shape: The patch shape to use for training. 185 batch_size: The batch size for training. 186 split: The split to download. Currently only supports 'train'. 187 vol_id: The train id to load. 188 download: Whether to download the data if it is not present. 189 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 190 191 Returns: 192 The DataLoader. 193 """ 194 ds_kwargs, loader_kwargs = util.split_kwargs( 195 torch_em.default_segmentation_dataset, **kwargs 196 ) 197 dataset = get_ctc_segmentation_dataset( 198 path, dataset_name, patch_shape, split=split, vol_id=vol_id, download=download, **ds_kwargs, 199 ) 200 201 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 202 return loader
Get the CTC dataloader for cell segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- dataset_name: Name of the dataset to be downloaded. The available datasets are: {', '.join(CTC_CHECKSUMS['train'].keys())}
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- split: The split to download. Currently only supports 'train'.
- vol_id: The train id to load.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.