torch_em.data.datasets.light_microscopy.dsb
This Dataset was used in a Kaggle Data Science Bowl. It contains light microscopy images with annotations for nucleus segmentation.
The dataset is described in the publication https://doi.org/10.1038/s41592-019-0612-7. Please cite it if you use this dataset in your research.
1"""This Dataset was used in a Kaggle Data Science Bowl. It contains light microscopy 2images with annotations for nucleus segmentation. 3 4The dataset is described in the publication https://doi.org/10.1038/s41592-019-0612-7. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from shutil import move 10from typing import List, Optional, Tuple, Union 11 12import torch_em 13from torch.utils.data import Dataset, DataLoader 14from .. import util 15 16DSB_URLS = { 17 "full": "", # TODO 18 "reduced": "https://github.com/stardist/stardist/releases/download/0.1.0/dsb2018.zip" 19} 20CHECKSUMS = { 21 "full": None, 22 "reduced": "e44921950edce378063aa4457e625581ba35b4c2dbd9a07c19d48900129f386f" 23} 24 25 26def get_dsb_data(path: Union[os.PathLike, str], source: str, download: bool) -> str: 27 """Download the DeepBacs training data. 28 29 Args: 30 path: Filepath to a folder where the downloaded data will be saved. 31 source: The source of the dataset. Can either be 'full' for the complete dataset, 32 or 'reduced' for the dataset excluding histopathology images. 33 download: Whether to download the data if it is not present. 34 35 Returns: 36 The filepath to the training data. 37 """ 38 os.makedirs(path, exist_ok=True) 39 url = DSB_URLS[source] 40 checksum = CHECKSUMS[source] 41 42 train_out_path = os.path.join(path, "train") 43 test_out_path = os.path.join(path, "test") 44 45 if os.path.exists(train_out_path) and os.path.exists(test_out_path): 46 return path 47 48 zip_path = os.path.join(path, "dsb.zip") 49 util.download_source(zip_path, url, download, checksum) 50 util.unzip(zip_path, path, True) 51 52 move(os.path.join(path, "dsb2018", "train"), train_out_path) 53 move(os.path.join(path, "dsb2018", "test"), test_out_path) 54 return path 55 56 57def get_dsb_dataset( 58 path: Union[os.PathLike, str], 59 split: str, 60 patch_shape: Tuple[int, int], 61 download: bool = False, 62 offsets: Optional[List[List[int]]] = None, 63 boundaries: bool = False, 64 binary: bool = False, 65 source: str = "reduced", 66 **kwargs 67) -> Dataset: 68 """Get the DSB dataset for nucleus segmentation. 69 70 Args: 71 path: Filepath to a folder where the downloaded data will be saved. 72 split: The split to use for the dataset. Either 'train' or 'test'. 73 patch_shape: The patch shape to use for training. 74 download: Whether to download the data if it is not present. 75 offsets: Offset values for affinity computation used as target. 76 boundaries: Whether to compute boundaries as the target. 77 binary: Whether to use a binary segmentation target. 78 source: The source of the dataset. Can either be 'full' for the complete dataset, 79 or 'reduced' for the dataset excluding histopathology images. 80 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 81 82 Returns: 83 The segmentation dataset. 84 """ 85 assert split in ("test", "train"), split 86 get_dsb_data(path, source, download) 87 88 image_path = os.path.join(path, split, "images") 89 label_path = os.path.join(path, split, "masks") 90 91 kwargs, _ = util.add_instance_label_transform( 92 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 93 ) 94 kwargs = util.update_kwargs(kwargs, "ndim", 2) 95 return torch_em.default_segmentation_dataset( 96 image_path, "*.tif", label_path, "*.tif", patch_shape, **kwargs 97 ) 98 99 100def get_dsb_loader( 101 path: Union[os.PathLike, str], 102 split: str, 103 patch_shape: Tuple[int, int], 104 batch_size: int, 105 download: bool = False, 106 offsets: Optional[List[List[int]]] = None, 107 boundaries: bool = False, 108 binary: bool = False, 109 source: str = "reduced", 110 **kwargs 111) -> DataLoader: 112 """Get the DSB dataloader for nucleus segmentation. 113 114 Args: 115 path: Filepath to a folder where the downloaded data will be saved. 116 split: The split to use for the dataset. Either 'train' or 'test'. 117 patch_shape: The patch shape to use for training. 118 batch_size: The batch size for training. 119 download: Whether to download the data if it is not present. 120 offsets: Offset values for affinity computation used as target. 121 boundaries: Whether to compute boundaries as the target. 122 binary: Whether to use a binary segmentation target. 123 source: The source of the dataset. Can either be 'full' for the complete dataset, 124 or 'reduced' for the dataset excluding histopathology images. 125 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 126 127 Returns: 128 The DataLoader. 129 """ 130 ds_kwargs, loader_kwargs = util.split_kwargs( 131 torch_em.default_segmentation_dataset, **kwargs 132 ) 133 dataset = get_dsb_dataset( 134 path, split, patch_shape, download=download, 135 offsets=offsets, boundaries=boundaries, binary=binary, 136 source=source, **ds_kwargs, 137 ) 138 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 139 return loader
DSB_URLS =
{'full': '', 'reduced': 'https://github.com/stardist/stardist/releases/download/0.1.0/dsb2018.zip'}
CHECKSUMS =
{'full': None, 'reduced': 'e44921950edce378063aa4457e625581ba35b4c2dbd9a07c19d48900129f386f'}
def
get_dsb_data(path: Union[os.PathLike, str], source: str, download: bool) -> str:
27def get_dsb_data(path: Union[os.PathLike, str], source: str, download: bool) -> str: 28 """Download the DeepBacs training data. 29 30 Args: 31 path: Filepath to a folder where the downloaded data will be saved. 32 source: The source of the dataset. Can either be 'full' for the complete dataset, 33 or 'reduced' for the dataset excluding histopathology images. 34 download: Whether to download the data if it is not present. 35 36 Returns: 37 The filepath to the training data. 38 """ 39 os.makedirs(path, exist_ok=True) 40 url = DSB_URLS[source] 41 checksum = CHECKSUMS[source] 42 43 train_out_path = os.path.join(path, "train") 44 test_out_path = os.path.join(path, "test") 45 46 if os.path.exists(train_out_path) and os.path.exists(test_out_path): 47 return path 48 49 zip_path = os.path.join(path, "dsb.zip") 50 util.download_source(zip_path, url, download, checksum) 51 util.unzip(zip_path, path, True) 52 53 move(os.path.join(path, "dsb2018", "train"), train_out_path) 54 move(os.path.join(path, "dsb2018", "test"), test_out_path) 55 return path
Download the DeepBacs training data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- source: The source of the dataset. Can either be 'full' for the complete dataset, or 'reduced' for the dataset excluding histopathology images.
- download: Whether to download the data if it is not present.
Returns:
The filepath to the training data.
def
get_dsb_dataset( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, source: str = 'reduced', **kwargs) -> torch.utils.data.dataset.Dataset:
58def get_dsb_dataset( 59 path: Union[os.PathLike, str], 60 split: str, 61 patch_shape: Tuple[int, int], 62 download: bool = False, 63 offsets: Optional[List[List[int]]] = None, 64 boundaries: bool = False, 65 binary: bool = False, 66 source: str = "reduced", 67 **kwargs 68) -> Dataset: 69 """Get the DSB dataset for nucleus segmentation. 70 71 Args: 72 path: Filepath to a folder where the downloaded data will be saved. 73 split: The split to use for the dataset. Either 'train' or 'test'. 74 patch_shape: The patch shape to use for training. 75 download: Whether to download the data if it is not present. 76 offsets: Offset values for affinity computation used as target. 77 boundaries: Whether to compute boundaries as the target. 78 binary: Whether to use a binary segmentation target. 79 source: The source of the dataset. Can either be 'full' for the complete dataset, 80 or 'reduced' for the dataset excluding histopathology images. 81 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 82 83 Returns: 84 The segmentation dataset. 85 """ 86 assert split in ("test", "train"), split 87 get_dsb_data(path, source, download) 88 89 image_path = os.path.join(path, split, "images") 90 label_path = os.path.join(path, split, "masks") 91 92 kwargs, _ = util.add_instance_label_transform( 93 kwargs, add_binary_target=True, binary=binary, boundaries=boundaries, offsets=offsets 94 ) 95 kwargs = util.update_kwargs(kwargs, "ndim", 2) 96 return torch_em.default_segmentation_dataset( 97 image_path, "*.tif", label_path, "*.tif", patch_shape, **kwargs 98 )
Get the DSB dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'train' or 'test'.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- source: The source of the dataset. Can either be 'full' for the complete dataset, or 'reduced' for the dataset excluding histopathology images.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_dsb_loader( path: Union[os.PathLike, str], split: str, patch_shape: Tuple[int, int], batch_size: int, download: bool = False, offsets: Optional[List[List[int]]] = None, boundaries: bool = False, binary: bool = False, source: str = 'reduced', **kwargs) -> torch.utils.data.dataloader.DataLoader:
101def get_dsb_loader( 102 path: Union[os.PathLike, str], 103 split: str, 104 patch_shape: Tuple[int, int], 105 batch_size: int, 106 download: bool = False, 107 offsets: Optional[List[List[int]]] = None, 108 boundaries: bool = False, 109 binary: bool = False, 110 source: str = "reduced", 111 **kwargs 112) -> DataLoader: 113 """Get the DSB dataloader for nucleus segmentation. 114 115 Args: 116 path: Filepath to a folder where the downloaded data will be saved. 117 split: The split to use for the dataset. Either 'train' or 'test'. 118 patch_shape: The patch shape to use for training. 119 batch_size: The batch size for training. 120 download: Whether to download the data if it is not present. 121 offsets: Offset values for affinity computation used as target. 122 boundaries: Whether to compute boundaries as the target. 123 binary: Whether to use a binary segmentation target. 124 source: The source of the dataset. Can either be 'full' for the complete dataset, 125 or 'reduced' for the dataset excluding histopathology images. 126 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 127 128 Returns: 129 The DataLoader. 130 """ 131 ds_kwargs, loader_kwargs = util.split_kwargs( 132 torch_em.default_segmentation_dataset, **kwargs 133 ) 134 dataset = get_dsb_dataset( 135 path, split, patch_shape, download=download, 136 offsets=offsets, boundaries=boundaries, binary=binary, 137 source=source, **ds_kwargs, 138 ) 139 loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) 140 return loader
Get the DSB dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- split: The split to use for the dataset. Either 'train' or 'test'.
- patch_shape: The patch shape to use for training.
- batch_size: The batch size for training.
- download: Whether to download the data if it is not present.
- offsets: Offset values for affinity computation used as target.
- boundaries: Whether to compute boundaries as the target.
- binary: Whether to use a binary segmentation target.
- source: The source of the dataset. Can either be 'full' for the complete dataset, or 'reduced' for the dataset excluding histopathology images.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.