torch_em.data.datasets.light_microscopy.cellpose

This dataset contains annotation for cell segmentation in fluorescene microscently-labeled microscopy images.

This dataset is from the following publications:

  1"""This dataset contains annotation for cell segmentation in fluorescene microscently-labeled microscopy images.
  2
  3This dataset is from the following publications:
  4- https://doi.org/10.1038/s41592-020-01018-x
  5- https://doi.org/10.1038/s41592-022-01663-4
  6Please cite it if you use this dataset in your research.
  7"""
  8
  9import os
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, Literal, Optional, List
 13
 14import torch_em
 15
 16from torch.utils.data import Dataset, DataLoader
 17
 18from .. import util
 19from .neurips_cell_seg import to_rgb
 20
 21
 22AVAILABLE_CHOICES = ["cyto", "cyto2"]
 23
 24
 25def get_cellpose_data(
 26    path: Union[os.PathLike, str],
 27    split: Literal["train", "test"],
 28    choice: Literal["cyto", "cyto2"],
 29    download: bool = False,
 30) -> str:
 31    """Instruction to download CellPose data.
 32
 33    NOTE: Please download the dataset from "https://www.cellpose.org/dataset".
 34
 35    Args:
 36        path: Filepath to a folder where the data is downloaded for further processing.
 37        split: The data split to use. Either 'train', or 'test'.
 38        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
 39        download: Whether to download the data if it is not present.
 40
 41    Returns:
 42        The filepath to the folder where the data is manually downloaded.
 43    """
 44    per_choice_dir = os.path.join(path, choice)  # path where the unzipped files will be stored
 45    if choice == "cyto":
 46        assert split in ["train", "test"], f"'{split}' is not a valid split in '{choice}'."
 47        zip_path = os.path.join(path, f"{split}.zip")
 48        data_dir = os.path.join(per_choice_dir, split)  # path where the per split images for 'cyto' exist.
 49    elif choice == "cyto2":
 50        assert split == "train", f"'{split}' is not a valid split in '{choice}'."
 51        zip_path = os.path.join(path, "train_cyto2.zip")
 52        data_dir = os.path.join(per_choice_dir, "train_cyto2")  # path where 'train' split images for 'cyto2' exist.
 53    else:
 54        raise ValueError(f"'{choice}' is not a valid dataset choice.")
 55
 56    if os.path.exists(data_dir):
 57        return data_dir
 58    else:
 59        if not os.path.exists(zip_path) and download:
 60            raise NotImplementedError(
 61                "The dataset cannot be automatically downloaded. "
 62                "Please see 'get_cellpose_data' in 'torch_em/data/datasets/light_microscopy/cellpose.py' for details."
 63            )
 64        util.unzip(zip_path=zip_path, dst=per_choice_dir, remove=False)
 65
 66    return data_dir
 67
 68
 69def get_cellpose_paths(
 70    path: Union[os.PathLike, str],
 71    split: Literal['train', 'test'],
 72    choice: Optional[Literal["cyto", "cyto2"]] = None,
 73    download: bool = False,
 74) -> Tuple[List[str], List[str]]:
 75    """Get paths to the CellPose data.
 76
 77    Args:
 78        path: Filepath to a folder where the downloaded data will be saved.
 79        split: The data split to use. Either 'train', or 'test'.
 80        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
 81        download: Whether to download the data if it is not present.
 82
 83    Returns:
 84        List of filepaths for the image data.
 85        List of filepaths for the label data.
 86    """
 87    data_dir = get_cellpose_data(path, split, choice, download)
 88
 89    image_paths = natsorted(glob(os.path.join(data_dir, "*_img.png")))
 90    gt_paths = natsorted(glob(os.path.join(data_dir, "*_masks.png")))
 91
 92    assert len(image_paths) == len(gt_paths) and len(image_paths) > 0
 93
 94    return image_paths, gt_paths
 95
 96
 97def get_cellpose_dataset(
 98    path: Union[os.PathLike, str],
 99    patch_shape: Tuple[int, int],
100    split: Literal["train", "test"],
101    choice: Optional[Literal["cyto", "cyto2"]] = None,
102    download: bool = False,
103    **kwargs
104) -> Dataset:
105    """Get the CellPose dataset for cell segmentation.
106
107    Args:
108        path: Filepath to a folder where the downloaded data will be saved.
109        patch_shape: The patch shape to use for training.
110        split: The data split to use. Either 'train', or 'test'.
111        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
112        download: Whether to download the data if it is not present.
113        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
114
115    Returns:
116        The segmentation dataset.
117    """
118    assert split in ["train", "test"]
119
120    if choice is None:
121        choice = AVAILABLE_CHOICES
122    else:
123        if not isinstance(choice, list):
124            choice = [choice]
125
126    image_paths, gt_paths = [], []
127    for per_choice in choice:
128        assert per_choice in AVAILABLE_CHOICES
129        per_image_paths, per_gt_paths = get_cellpose_paths(path, split, per_choice, download)
130        image_paths.extend(per_image_paths)
131        gt_paths.extend(per_gt_paths)
132
133    if "raw_transform" not in kwargs:
134        kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
135
136    if "transform" not in kwargs:
137        kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2)
138
139    return torch_em.default_segmentation_dataset(
140        raw_paths=image_paths,
141        raw_key=None,
142        label_paths=gt_paths,
143        label_key=None,
144        is_seg_dataset=False,
145        patch_shape=patch_shape,
146        ndim=2,
147        **kwargs
148    )
149
150
151def get_cellpose_loader(
152    path: Union[os.PathLike, str],
153    batch_size: int,
154    patch_shape: Tuple[int, int],
155    split: Literal["train", "test"],
156    choice: Optional[Literal["cyto", "cyto2"]] = None,
157    download: bool = False,
158    **kwargs
159) -> DataLoader:
160    """Get the CellPose dataloader for cell segmentation.
161
162    Args:
163        path: Filepath to a folder where the downloaded data will be saved.
164        batch_size: The batch size for training.
165        patch_shape: The patch shape to use for training.
166        split: The data split to use. Either 'train', or 'test'.
167        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
168        download: Whether to download the data if it is not present.
169        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
170
171    Returns:
172        The DataLoader.
173    """
174    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
175    dataset = get_cellpose_dataset(path, patch_shape, split, choice, download, **ds_kwargs)
176    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
AVAILABLE_CHOICES = ['cyto', 'cyto2']
def get_cellpose_data( path: Union[os.PathLike, str], split: Literal['train', 'test'], choice: Literal['cyto', 'cyto2'], download: bool = False) -> str:
26def get_cellpose_data(
27    path: Union[os.PathLike, str],
28    split: Literal["train", "test"],
29    choice: Literal["cyto", "cyto2"],
30    download: bool = False,
31) -> str:
32    """Instruction to download CellPose data.
33
34    NOTE: Please download the dataset from "https://www.cellpose.org/dataset".
35
36    Args:
37        path: Filepath to a folder where the data is downloaded for further processing.
38        split: The data split to use. Either 'train', or 'test'.
39        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
40        download: Whether to download the data if it is not present.
41
42    Returns:
43        The filepath to the folder where the data is manually downloaded.
44    """
45    per_choice_dir = os.path.join(path, choice)  # path where the unzipped files will be stored
46    if choice == "cyto":
47        assert split in ["train", "test"], f"'{split}' is not a valid split in '{choice}'."
48        zip_path = os.path.join(path, f"{split}.zip")
49        data_dir = os.path.join(per_choice_dir, split)  # path where the per split images for 'cyto' exist.
50    elif choice == "cyto2":
51        assert split == "train", f"'{split}' is not a valid split in '{choice}'."
52        zip_path = os.path.join(path, "train_cyto2.zip")
53        data_dir = os.path.join(per_choice_dir, "train_cyto2")  # path where 'train' split images for 'cyto2' exist.
54    else:
55        raise ValueError(f"'{choice}' is not a valid dataset choice.")
56
57    if os.path.exists(data_dir):
58        return data_dir
59    else:
60        if not os.path.exists(zip_path) and download:
61            raise NotImplementedError(
62                "The dataset cannot be automatically downloaded. "
63                "Please see 'get_cellpose_data' in 'torch_em/data/datasets/light_microscopy/cellpose.py' for details."
64            )
65        util.unzip(zip_path=zip_path, dst=per_choice_dir, remove=False)
66
67    return data_dir

Instruction to download CellPose data.

NOTE: Please download the dataset from "https://wwwtorch_em.data.datasets.light_microscopy.cellpose.org/dataset".

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of dataset. Either 'cyto' or 'cyto2'.
  • download: Whether to download the data if it is not present.
Returns:

The filepath to the folder where the data is manually downloaded.

def get_cellpose_paths( path: Union[os.PathLike, str], split: Literal['train', 'test'], choice: Optional[Literal['cyto', 'cyto2']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
70def get_cellpose_paths(
71    path: Union[os.PathLike, str],
72    split: Literal['train', 'test'],
73    choice: Optional[Literal["cyto", "cyto2"]] = None,
74    download: bool = False,
75) -> Tuple[List[str], List[str]]:
76    """Get paths to the CellPose data.
77
78    Args:
79        path: Filepath to a folder where the downloaded data will be saved.
80        split: The data split to use. Either 'train', or 'test'.
81        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
82        download: Whether to download the data if it is not present.
83
84    Returns:
85        List of filepaths for the image data.
86        List of filepaths for the label data.
87    """
88    data_dir = get_cellpose_data(path, split, choice, download)
89
90    image_paths = natsorted(glob(os.path.join(data_dir, "*_img.png")))
91    gt_paths = natsorted(glob(os.path.join(data_dir, "*_masks.png")))
92
93    assert len(image_paths) == len(gt_paths) and len(image_paths) > 0
94
95    return image_paths, gt_paths

Get paths to the CellPose data.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of dataset. Either 'cyto' or 'cyto2'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_cellpose_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['cyto', 'cyto2']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 98def get_cellpose_dataset(
 99    path: Union[os.PathLike, str],
100    patch_shape: Tuple[int, int],
101    split: Literal["train", "test"],
102    choice: Optional[Literal["cyto", "cyto2"]] = None,
103    download: bool = False,
104    **kwargs
105) -> Dataset:
106    """Get the CellPose dataset for cell segmentation.
107
108    Args:
109        path: Filepath to a folder where the downloaded data will be saved.
110        patch_shape: The patch shape to use for training.
111        split: The data split to use. Either 'train', or 'test'.
112        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
113        download: Whether to download the data if it is not present.
114        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
115
116    Returns:
117        The segmentation dataset.
118    """
119    assert split in ["train", "test"]
120
121    if choice is None:
122        choice = AVAILABLE_CHOICES
123    else:
124        if not isinstance(choice, list):
125            choice = [choice]
126
127    image_paths, gt_paths = [], []
128    for per_choice in choice:
129        assert per_choice in AVAILABLE_CHOICES
130        per_image_paths, per_gt_paths = get_cellpose_paths(path, split, per_choice, download)
131        image_paths.extend(per_image_paths)
132        gt_paths.extend(per_gt_paths)
133
134    if "raw_transform" not in kwargs:
135        kwargs["raw_transform"] = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
136
137    if "transform" not in kwargs:
138        kwargs["transform"] = torch_em.transform.get_augmentations(ndim=2)
139
140    return torch_em.default_segmentation_dataset(
141        raw_paths=image_paths,
142        raw_key=None,
143        label_paths=gt_paths,
144        label_key=None,
145        is_seg_dataset=False,
146        patch_shape=patch_shape,
147        ndim=2,
148        **kwargs
149    )

Get the CellPose dataset for cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of dataset. Either 'cyto' or 'cyto2'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_cellpose_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'test'], choice: Optional[Literal['cyto', 'cyto2']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
152def get_cellpose_loader(
153    path: Union[os.PathLike, str],
154    batch_size: int,
155    patch_shape: Tuple[int, int],
156    split: Literal["train", "test"],
157    choice: Optional[Literal["cyto", "cyto2"]] = None,
158    download: bool = False,
159    **kwargs
160) -> DataLoader:
161    """Get the CellPose dataloader for cell segmentation.
162
163    Args:
164        path: Filepath to a folder where the downloaded data will be saved.
165        batch_size: The batch size for training.
166        patch_shape: The patch shape to use for training.
167        split: The data split to use. Either 'train', or 'test'.
168        choice: The choice of dataset. Either 'cyto' or 'cyto2'.
169        download: Whether to download the data if it is not present.
170        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
171
172    Returns:
173        The DataLoader.
174    """
175    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
176    dataset = get_cellpose_dataset(path, patch_shape, split, choice, download, **ds_kwargs)
177    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the CellPose dataloader for cell segmentation.

Arguments:
  • path: Filepath to a folder where the downloaded data will be saved.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train', or 'test'.
  • choice: The choice of dataset. Either 'cyto' or 'cyto2'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.