torch_em.data.datasets.light_microscopy.deepseas

The DeepSeas dataset contains annotations for cell segmentation in phase-contrast microscopy images of stem cells, bronchial cells and muscle cells.

NOTE: Please download the dataset manually from https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing. The original data is located at: https://drive.google.com/drive/folders/13RhhBAetSWkjySyhJcDqj_FaO09hxkhO?usp=sharing. The tracking data is located at: https://drive.google.com/drive/folders/10LWey85fgHgFj_myIr1CYSOviD4SleE4?usp=sharing.

The dataset is located at https://deepseas.org/datasets/. The codebase for this dataset is located at https://github.com/abzargar/DeepSea. Please cite them if you use this dataset for your research.

  1"""The DeepSeas dataset contains annotations for cell segmentation in
  2phase-contrast microscopy images of stem cells, bronchial cells and muscle cells.
  3
  4NOTE: Please download the dataset manually from https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing.
  5The original data is located at: https://drive.google.com/drive/folders/13RhhBAetSWkjySyhJcDqj_FaO09hxkhO?usp=sharing.
  6The tracking data is located at: https://drive.google.com/drive/folders/10LWey85fgHgFj_myIr1CYSOviD4SleE4?usp=sharing.
  7
  8The dataset is located at https://deepseas.org/datasets/.
  9The codebase for this dataset is located at https://github.com/abzargar/DeepSea.
 10Please cite them if you use this dataset for your research.
 11"""  # noqa
 12
 13import os
 14from glob import glob
 15from natsort import natsorted
 16from typing import Union, Tuple, Literal, List
 17
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23
 24
 25URL = "https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing"
 26
 27
 28def get_deepseas_data(path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str:
 29    """Obtain the DeepSeas dataset.
 30
 31    NOTE: You need to manually download the 'segmentation_dataset' from the link:
 32    - https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing.
 33
 34    Args:
 35        path: Filepath to a folder where the downloaded data will be stored.
 36        split: The choice of data split.
 37        download: Whether to download the data if it is not present.
 38
 39    Returns:
 40        Filepath where the data is manually downloaded for further processing.
 41    """
 42    if split not in ["train", "test"]:
 43        raise ValueError(f"'{split}' is not a valid split choice.")
 44
 45    data_dir = os.path.join(path, "segmentation_dataset", split)
 46    if os.path.exists(data_dir):
 47        return data_dir
 48
 49    zip_paths = glob(os.path.join(path, "*.zip"))
 50    if len(zip_paths) == 0 or download:
 51        raise NotImplementedError(
 52            "Automatic download for DeepSeas data is not supported at the moment. "
 53            f"Please download the 'segmentation_dataset' from {URL} and place the zip files at {path}."
 54        )
 55
 56    for zip_path in zip_paths:
 57        util.unzip(zip_path=zip_path, dst=path, remove=False)
 58
 59    return data_dir
 60
 61
 62def get_deepseas_paths(
 63    path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False
 64) -> Tuple[List[str], List[str]]:
 65    """Get paths to the DeepSeas data.
 66
 67    Args:
 68        path: Filepath to a folder where the downloaded data will be stored.
 69        split: The choice of data split.
 70        download: Whether to download the data if it is not present.
 71
 72    Returns:
 73        List of filepaths for the image data.
 74        List of filepaths for the label data.
 75    """
 76    data_dir = get_deepseas_data(path, split, download)
 77
 78    raw_paths = natsorted(glob(os.path.join(data_dir, "images", "*.png")))
 79    label_paths = natsorted(glob(os.path.join(data_dir, "masks", "*.png")))
 80
 81    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
 82
 83    return raw_paths, label_paths
 84
 85
 86def get_deepseas_dataset(
 87    path: Union[os.PathLike, str],
 88    patch_shape: Tuple[int, int],
 89    split: Literal['train', 'test'],
 90    download: bool = False,
 91    **kwargs
 92) -> Dataset:
 93    """Get the DeepSeas dataset for cell segmentation.
 94
 95    Args:
 96        path: Filepath to a folder where the downloaded data will be stored.
 97        patch_shape: The patch shape to use for training.
 98        split: The choice of data split.
 99        download: Whether to download the data if it is not present.
100        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
101
102    Returns:
103        The segmentation dataset.
104    """
105    raw_paths, label_paths = get_deepseas_paths(path, split, download)
106
107    return torch_em.default_segmentation_dataset(
108        raw_paths=raw_paths,
109        raw_key=None,
110        label_paths=label_paths,
111        label_key=None,
112        patch_shape=patch_shape,
113        is_seg_dataset=False,
114        with_channels=True,
115        **kwargs
116    )
117
118
119def get_deepseas_loader(
120    path: Union[os.PathLike, str],
121    batch_size: int,
122    patch_shape: Tuple[int, int],
123    split: Literal['train', 'test'],
124    download: bool = False,
125    **kwargs
126) -> DataLoader:
127    """Get the DeepSeas dataloader for cell segmentation.
128
129    Args:
130        path: Filepath to a folder where the downloaded data will be stored.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        split: The choice of data split.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
136
137    Returns:
138        The DataLoader.
139    """
140    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
141    dataset = get_deepseas_dataset(path, patch_shape, split, download, **ds_kwargs)
142    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing'
def get_deepseas_data( path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str:
29def get_deepseas_data(path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> str:
30    """Obtain the DeepSeas dataset.
31
32    NOTE: You need to manually download the 'segmentation_dataset' from the link:
33    - https://drive.google.com/drive/folders/18odgkzafW8stHkzME_s7Es-ue7odVAc5?usp=sharing.
34
35    Args:
36        path: Filepath to a folder where the downloaded data will be stored.
37        split: The choice of data split.
38        download: Whether to download the data if it is not present.
39
40    Returns:
41        Filepath where the data is manually downloaded for further processing.
42    """
43    if split not in ["train", "test"]:
44        raise ValueError(f"'{split}' is not a valid split choice.")
45
46    data_dir = os.path.join(path, "segmentation_dataset", split)
47    if os.path.exists(data_dir):
48        return data_dir
49
50    zip_paths = glob(os.path.join(path, "*.zip"))
51    if len(zip_paths) == 0 or download:
52        raise NotImplementedError(
53            "Automatic download for DeepSeas data is not supported at the moment. "
54            f"Please download the 'segmentation_dataset' from {URL} and place the zip files at {path}."
55        )
56
57    for zip_path in zip_paths:
58        util.unzip(zip_path=zip_path, dst=path, remove=False)
59
60    return data_dir

Obtain the DeepSeas dataset.

NOTE: You need to manually download the 'segmentation_dataset' from the link:

Arguments:
  • path: Filepath to a folder where the downloaded data will be stored.
  • split: The choice of data split.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is manually downloaded for further processing.

def get_deepseas_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False) -> Tuple[List[str], List[str]]:
63def get_deepseas_paths(
64    path: Union[os.PathLike, str], split: Literal['train', 'test'], download: bool = False
65) -> Tuple[List[str], List[str]]:
66    """Get paths to the DeepSeas data.
67
68    Args:
69        path: Filepath to a folder where the downloaded data will be stored.
70        split: The choice of data split.
71        download: Whether to download the data if it is not present.
72
73    Returns:
74        List of filepaths for the image data.
75        List of filepaths for the label data.
76    """
77    data_dir = get_deepseas_data(path, split, download)
78
79    raw_paths = natsorted(glob(os.path.join(data_dir, "images", "*.png")))
80    label_paths = natsorted(glob(os.path.join(data_dir, "masks", "*.png")))
81
82    assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0
83
84    return raw_paths, label_paths

Get paths to the DeepSeas data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be stored.
  • split: The choice of data split.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_deepseas_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 87def get_deepseas_dataset(
 88    path: Union[os.PathLike, str],
 89    patch_shape: Tuple[int, int],
 90    split: Literal['train', 'test'],
 91    download: bool = False,
 92    **kwargs
 93) -> Dataset:
 94    """Get the DeepSeas dataset for cell segmentation.
 95
 96    Args:
 97        path: Filepath to a folder where the downloaded data will be stored.
 98        patch_shape: The patch shape to use for training.
 99        split: The choice of data split.
100        download: Whether to download the data if it is not present.
101        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
102
103    Returns:
104        The segmentation dataset.
105    """
106    raw_paths, label_paths = get_deepseas_paths(path, split, download)
107
108    return torch_em.default_segmentation_dataset(
109        raw_paths=raw_paths,
110        raw_key=None,
111        label_paths=label_paths,
112        label_key=None,
113        patch_shape=patch_shape,
114        is_seg_dataset=False,
115        with_channels=True,
116        **kwargs
117    )

Get the DeepSeas dataset for cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be stored.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_deepseas_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
120def get_deepseas_loader(
121    path: Union[os.PathLike, str],
122    batch_size: int,
123    patch_shape: Tuple[int, int],
124    split: Literal['train', 'test'],
125    download: bool = False,
126    **kwargs
127) -> DataLoader:
128    """Get the DeepSeas dataloader for cell segmentation.
129
130    Args:
131        path: Filepath to a folder where the downloaded data will be stored.
132        batch_size: The batch size for training.
133        patch_shape: The patch shape to use for training.
134        split: The choice of data split.
135        download: Whether to download the data if it is not present.
136        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
137
138    Returns:
139        The DataLoader.
140    """
141    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
142    dataset = get_deepseas_dataset(path, patch_shape, split, download, **ds_kwargs)
143    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the DeepSeas dataloader for cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be stored.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The choice of data split.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.