torch_em.data.datasets.light_microscopy.neurips_cell_seg

This dataset comes from the Neurips Cell Segmentation Challenge, which collects microscopy images and annotations for cell segmentation.

The dataset contains both images with annotations for cell segmentation and unlabed images for self-supervised or semi-supervised learning. See also the challenge website for details: https://neurips22-cellseg.grand-challenge.org/. The dataset os decribed in the publication https://doi.org/10.1038/s41592-024-02233-6. Please cite it if you use the dataset in your research.

  1"""This dataset comes from the Neurips Cell Segmentation Challenge,
  2which collects microscopy images and annotations for cell segmentation.
  3
  4The dataset contains both images with annotations for cell segmentation
  5and unlabed images for self-supervised or semi-supervised learning.
  6See also the challenge website for details: https://neurips22-cellseg.grand-challenge.org/.
  7The dataset os decribed in the publication https://doi.org/10.1038/s41592-024-02233-6.
  8Please cite it if you use the dataset in your research.
  9"""
 10
 11import os
 12from glob import glob
 13from typing import Union, Tuple, Any, Optional, List
 14
 15import numpy as np
 16
 17import torch
 18from torch.utils.data import Dataset, DataLoader
 19
 20import torch_em
 21
 22from .. import util
 23from ... import ImageCollectionDataset, RawImageCollectionDataset, ConcatDataset
 24
 25
 26URL = {
 27    "train": "https://zenodo.org/records/10719375/files/Training-labeled.zip",
 28    "val": "https://zenodo.org/records/10719375/files/Tuning.zip",
 29    "test": "https://zenodo.org/records/10719375/files/Testing.zip",
 30    "unlabeled": "https://zenodo.org/records/10719375/files/train-unlabeled-part1.zip",
 31    "unlabeled_wsi": "https://zenodo.org/records/10719375/files/train-unlabeled-part2.zip"
 32}
 33
 34CHECKSUM = {
 35    "train": "b2383929eb8e99b2716fa0d4e2f6e03983e626a57cf00fe85175869c54aa3592",
 36    "val": "849423d36bb8fcc2d91a5b189a3b6d93c3d4071c9701eaaa44ba393a510459c4",
 37    "test": "3379730221f43830d30fddf131750e967c9c9bdf04f98811e852a050eb659ccc",
 38    "unlabeled": "390b38b398b05e9e5306a024a3bd48ab22e49592cfab3c1a119eab3636b38e0d",
 39    "unlabeled_wsi": "d1e68eba2918305eab8b846e7578ac14683de970e3fa6a7c2a4a55753be56204"
 40}
 41
 42
 43DIR_NAMES = {
 44    "train": "Training-labeled", "val": "Tuning", "test": "Testing/Public",
 45    "unlabeled": "release-part1", "unlabeled_wsi": "train-unlabeled-part2"
 46}
 47
 48ZIP_PATH = {
 49    "train": "Training-labeled.zip", "val": "Tuning.zip", "test": "Testing.zip",
 50    "unlabeled": "train-unlabeled-part1.zip", "unlabeled_wsi": "train-unlabeled-part2.zip"
 51}
 52
 53
 54def to_rgb(image):
 55    if image.ndim == 2:
 56        image = np.concatenate([image[None]] * 3, axis=0)
 57
 58    if image.ndim == 3 and image.shape[-1] == 3:
 59        image = image.transpose(2, 0, 1)
 60
 61    assert image.ndim == 3
 62    assert image.shape[0] == 3, f"{image.shape}"
 63    return image
 64
 65
 66def get_neurips_cellseg_data(root: Union[os.PathLike, str], split: str, download: bool) -> str:
 67    f"""Download the Neurips Cell Seg training data.
 68
 69    Args:
 70        root: Filepath to a folder where the downloaded data will be saved.
 71        split: The data split to download. Available splits are:
 72            {', '.join(URL.keys())}
 73        download: Whether to download the data if it is not present.
 74
 75    Returns:
 76        The filepath to the training data.
 77    """
 78    os.makedirs(root, exist_ok=True)
 79
 80    target_dir = os.path.join(root, DIR_NAMES[split])
 81    zip_path = os.path.join(root, ZIP_PATH[split])
 82
 83    if not os.path.exists(target_dir):
 84        util.download_source(path=zip_path, url=URL[split], download=download, checksum=CHECKSUM[split])
 85        util.unzip(zip_path=zip_path, dst=root)
 86
 87    return target_dir
 88
 89
 90def get_neurips_cellseg_paths(
 91    root: Union[os.PathLike, str], split: str, download: bool = False
 92) -> Tuple[List[str], List[str]]:
 93    f"""Get paths to NeurIPS CellSeg Challenge data.
 94
 95    Args:
 96        root: Filepath to a folder where the downloaded data will be saved.
 97        split: The data split to download. Available splits are:
 98            {', '.join(URL.keys())}
 99        download: Whether to download the data if it is not present.
100
101    Returns:
102        List of filepaths for the image data.
103        List of filepaths for the label data.
104    """
105    path = get_neurips_cellseg_data(root, split, download)
106
107    image_folder = os.path.join(path, "images")
108    assert os.path.exists(image_folder)
109    label_folder = os.path.join(path, "labels")
110    assert os.path.exists(label_folder)
111
112    all_image_paths = glob(os.path.join(image_folder, "*"))
113    all_image_paths.sort()
114    all_label_paths = glob(os.path.join(label_folder, "*"))
115    all_label_paths.sort()
116    assert len(all_image_paths) == len(all_label_paths)
117
118    return all_image_paths, all_label_paths
119
120
121def get_neurips_cellseg_supervised_dataset(
122    root: Union[str, os.PathLike],
123    split: str,
124    patch_shape: Tuple[int, int],
125    make_rgb: bool = True,
126    label_transform: Optional[Any] = None,
127    label_transform2: Optional[Any] = None,
128    raw_transform: Optional[Any] = None,
129    transform: Optional[Any] = None,
130    label_dtype: torch.dtype = torch.float32,
131    n_samples: Optional[int] = None,
132    sampler: Optional[Any] = None,
133    download: bool = False,
134) -> Dataset:
135    f"""Get the dataset for cell segmentation from the NeurIPS Cell Seg Challenge.
136
137    Args:
138        root: Filepath to a folder where the downloaded data will be saved.
139        split: The data split to download. Available splits are:
140            {', '.join(URL.keys())}
141        patch_shape: The patch shape to use for training.
142        make_rgb: Whether to map all data to RGB or treat it as grayscale.
143        label_transform: Transformation of labels, applied before data augmentation.
144        label_transform2: Transformation of labels, applied after data augmentation.
145        raw_transform: Transformation of the raw data.
146        label_dtype: The data type of the label data.
147        n_samples: Number of samples per epoch from this dataset.
148        sampler: Sampler for rejecting batches.
149        download: Whether to download the data if it is not present.
150
151    Returns:
152        The segmentation dataset.
153    """
154    assert split in ("train", "val", "test"), split
155    image_paths, label_paths = get_neurips_cellseg_paths(root, split, download)
156
157    if raw_transform is None:
158        trafo = to_rgb if make_rgb else None
159        raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo)
160
161    if transform is None:
162        transform = torch_em.transform.get_augmentations(ndim=2)
163
164    return ImageCollectionDataset(
165        raw_image_paths=image_paths,
166        label_image_paths=label_paths,
167        patch_shape=patch_shape,
168        raw_transform=raw_transform,
169        label_transform=label_transform,
170        label_transform2=label_transform2,
171        label_dtype=label_dtype,
172        transform=transform,
173        n_samples=n_samples,
174        sampler=sampler
175    )
176
177
178def get_neurips_cellseg_supervised_loader(
179    root: Union[str, os.PathLike],
180    split: str,
181    patch_shape: Tuple[int, int],
182    batch_size: int,
183    make_rgb: bool = True,
184    label_transform: Optional[Any] = None,
185    label_transform2: Optional[Any] = None,
186    raw_transform: Optional[Any] = None,
187    transform: Optional[Any] = None,
188    label_dtype: torch.dtype = torch.float32,
189    n_samples: Optional[Any] = None,
190    sampler: Optional[Any] = None,
191    download: bool = False,
192    **loader_kwargs
193) -> DataLoader:
194    f"""Get the dataset for cell segmentation from the NeurIPS Cell Seg Challenge.
195
196    Args:
197        root: Filepath to a folder where the downloaded data will be saved.
198        split: The data split to download. Available splits are:
199            {', '.join(URL.keys())}
200        patch_shape: The patch shape to use for training.
201        batch_size: The batch size for training.
202        make_rgb: Whether to map all data to RGB or treat it as grayscale.
203        label_transform: Transformation of labels, applied before data augmentation.
204        label_transform2: Transformation of labels, applied after data augmentation.
205        raw_transform: Transformation of the raw data.
206        transform: Transformation applied to raw and label data.
207        label_dtype: The data type of the label data.
208        n_samples: Number of samples per epoch from this dataset.
209        sampler: Sampler for rejecting batches.
210        download: Whether to download the data if it is not present.
211        loader_kwargs: Keyword arguments for the PyTorch DataLoader.
212
213    Returns:
214        The DataLoader.
215    """
216    ds = get_neurips_cellseg_supervised_dataset(
217        root=root,
218        split=split,
219        patch_shape=patch_shape,
220        make_rgb=make_rgb,
221        label_transform=label_transform,
222        label_transform2=label_transform2,
223        raw_transform=raw_transform,
224        transform=transform,
225        label_dtype=label_dtype,
226        n_samples=n_samples,
227        sampler=sampler,
228        download=download
229    )
230    return torch_em.segmentation.get_data_loader(ds, batch_size, **loader_kwargs)
231
232
233def _get_image_paths(root, download):
234    path = get_neurips_cellseg_data(root, "unlabeled", download)
235    image_paths = glob(os.path.join(path, "*"))
236    image_paths.sort()
237    return image_paths
238
239
240def _get_wholeslide_paths(root, patch_shape, download):
241    path = get_neurips_cellseg_data(root, "unlabeled_wsi", download)
242    image_paths = glob(os.path.join(path, "*"))
243    image_paths.sort()
244
245    # one of the whole slides doesn't support memmap which will make it very slow to load
246    image_paths = [path for path in image_paths if torch_em.util.supports_memmap(path)]
247    assert len(image_paths) > 0
248
249    n_samples = 0
250    for im_path in image_paths:
251        shape = torch_em.util.load_image(im_path).shape
252        assert len(shape) == 3 and shape[-1] == 3
253        im_shape = shape[:2]
254        n_samples += np.prod([sh // psh for sh, psh in zip(im_shape, patch_shape)])
255
256    return image_paths, n_samples
257
258
259def get_neurips_cellseg_unsupervised_dataset(
260    root: Union[str, os.PathLike],
261    patch_shape: Tuple[int, int],
262    make_rgb: bool = True,
263    raw_transform: Optional[Any] = None,
264    transform: Optional[Any] = None,
265    dtype: torch.dtype = torch.float32,
266    sampler: Optional[Any] = None,
267    use_images: bool = True,
268    use_wholeslide: bool = True,
269    download: bool = False,
270) -> Dataset:
271    """Get the unsupervised dataset from the NeurIPS Cell Seg Challenge.
272
273    Args:
274        root: Filepath to a folder where the downloaded data will be saved.
275        patch_shape: The patch shape to use for training.
276        make_rgb: Whether to map all data to RGB or treat it as grayscale.
277        raw_transform: Transformation of the raw data.
278        transform: Transformation applied to raw and label data.
279        dtype: The data type of the image data.
280        sampler: Sampler for rejecting batches.
281        use_images: Whether to use the normal image data.
282        use_wholeslide: Whether to use the wholeslide image data.
283        download: Whether to download the data if it is not present.
284
285    Returns:
286        The segmentation dataset.
287    """
288    if raw_transform is None:
289        trafo = to_rgb if make_rgb else None
290        raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo)
291    if transform is None:
292        transform = torch_em.transform.get_augmentations(ndim=2)
293
294    datasets = []
295    if use_images:
296        image_paths = _get_image_paths(root, download)
297        datasets.append(
298            RawImageCollectionDataset(
299                raw_image_paths=image_paths,
300                patch_shape=patch_shape,
301                raw_transform=raw_transform,
302                transform=transform,
303                dtype=dtype,
304                sampler=sampler
305            )
306        )
307    if use_wholeslide:
308        image_paths, n_samples = _get_wholeslide_paths(root, patch_shape, download)
309        datasets.append(
310            RawImageCollectionDataset(
311                raw_image_paths=image_paths,
312                patch_shape=patch_shape,
313                raw_transform=raw_transform,
314                transform=transform,
315                dtype=dtype,
316                n_samples=n_samples,
317                sampler=sampler
318            )
319        )
320    assert len(datasets) > 0
321    return ConcatDataset(*datasets)
322
323
324def get_neurips_cellseg_unsupervised_loader(
325    root: Union[str, os.PathLike],
326    patch_shape: Tuple[int, int],
327    batch_size: int,
328    make_rgb: bool = True,
329    raw_transform: Optional[Any] = None,
330    transform: Optional[Any] = None,
331    dtype: torch.dtype = torch.float32,
332    sampler: Optional[Any] = None,
333    use_images: bool = True,
334    use_wholeslide: bool = True,
335    download: bool = False,
336    **loader_kwargs,
337) -> DataLoader:
338    """Get the unsupervised dataset from the NeurIPS Cell Seg Challenge.
339
340    Args:
341        root: Filepath to a folder where the downloaded data will be saved.
342        patch_shape: The patch shape to use for training.
343        batch_size: The batch size for training.
344        make_rgb: Whether to map all data to RGB or treat it as grayscale.
345        raw_transform: Transformation of the raw data.
346        transform: Transformation applied to raw and label data.
347        dtype: The data type of the image data.
348        sampler: Sampler for rejecting batches.
349        use_images: Whether to use the normal image data.
350        use_wholeslide: Whether to use the wholeslide image data.
351        download: Whether to download the data if it is not present.
352        loader_kwargs: Keyword arguments for the PyTorch DataLoader.
353
354    Returns:
355        The DataLoader.
356    """
357    ds = get_neurips_cellseg_unsupervised_dataset(
358        root=root, patch_shape=patch_shape, make_rgb=make_rgb, raw_transform=raw_transform, transform=transform,
359        dtype=dtype, sampler=sampler, use_images=use_images, use_wholeslide=use_wholeslide, download=download
360    )
361    return torch_em.segmentation.get_data_loader(ds, batch_size, **loader_kwargs)
URL = {'train': 'https://zenodo.org/records/10719375/files/Training-labeled.zip', 'val': 'https://zenodo.org/records/10719375/files/Tuning.zip', 'test': 'https://zenodo.org/records/10719375/files/Testing.zip', 'unlabeled': 'https://zenodo.org/records/10719375/files/train-unlabeled-part1.zip', 'unlabeled_wsi': 'https://zenodo.org/records/10719375/files/train-unlabeled-part2.zip'}
CHECKSUM = {'train': 'b2383929eb8e99b2716fa0d4e2f6e03983e626a57cf00fe85175869c54aa3592', 'val': '849423d36bb8fcc2d91a5b189a3b6d93c3d4071c9701eaaa44ba393a510459c4', 'test': '3379730221f43830d30fddf131750e967c9c9bdf04f98811e852a050eb659ccc', 'unlabeled': '390b38b398b05e9e5306a024a3bd48ab22e49592cfab3c1a119eab3636b38e0d', 'unlabeled_wsi': 'd1e68eba2918305eab8b846e7578ac14683de970e3fa6a7c2a4a55753be56204'}
DIR_NAMES = {'train': 'Training-labeled', 'val': 'Tuning', 'test': 'Testing/Public', 'unlabeled': 'release-part1', 'unlabeled_wsi': 'train-unlabeled-part2'}
ZIP_PATH = {'train': 'Training-labeled.zip', 'val': 'Tuning.zip', 'test': 'Testing.zip', 'unlabeled': 'train-unlabeled-part1.zip', 'unlabeled_wsi': 'train-unlabeled-part2.zip'}
def to_rgb(image):
55def to_rgb(image):
56    if image.ndim == 2:
57        image = np.concatenate([image[None]] * 3, axis=0)
58
59    if image.ndim == 3 and image.shape[-1] == 3:
60        image = image.transpose(2, 0, 1)
61
62    assert image.ndim == 3
63    assert image.shape[0] == 3, f"{image.shape}"
64    return image
def get_neurips_cellseg_data(root: Union[os.PathLike, str], split: str, download: bool) -> str:
67def get_neurips_cellseg_data(root: Union[os.PathLike, str], split: str, download: bool) -> str:
68    f"""Download the Neurips Cell Seg training data.
69
70    Args:
71        root: Filepath to a folder where the downloaded data will be saved.
72        split: The data split to download. Available splits are:
73            {', '.join(URL.keys())}
74        download: Whether to download the data if it is not present.
75
76    Returns:
77        The filepath to the training data.
78    """
79    os.makedirs(root, exist_ok=True)
80
81    target_dir = os.path.join(root, DIR_NAMES[split])
82    zip_path = os.path.join(root, ZIP_PATH[split])
83
84    if not os.path.exists(target_dir):
85        util.download_source(path=zip_path, url=URL[split], download=download, checksum=CHECKSUM[split])
86        util.unzip(zip_path=zip_path, dst=root)
87
88    return target_dir
def get_neurips_cellseg_paths( root: Union[os.PathLike, str], split: str, download: bool = False) -> Tuple[List[str], List[str]]:
 91def get_neurips_cellseg_paths(
 92    root: Union[os.PathLike, str], split: str, download: bool = False
 93) -> Tuple[List[str], List[str]]:
 94    f"""Get paths to NeurIPS CellSeg Challenge data.
 95
 96    Args:
 97        root: Filepath to a folder where the downloaded data will be saved.
 98        split: The data split to download. Available splits are:
 99            {', '.join(URL.keys())}
100        download: Whether to download the data if it is not present.
101
102    Returns:
103        List of filepaths for the image data.
104        List of filepaths for the label data.
105    """
106    path = get_neurips_cellseg_data(root, split, download)
107
108    image_folder = os.path.join(path, "images")
109    assert os.path.exists(image_folder)
110    label_folder = os.path.join(path, "labels")
111    assert os.path.exists(label_folder)
112
113    all_image_paths = glob(os.path.join(image_folder, "*"))
114    all_image_paths.sort()
115    all_label_paths = glob(os.path.join(label_folder, "*"))
116    all_label_paths.sort()
117    assert len(all_image_paths) == len(all_label_paths)
118
119    return all_image_paths, all_label_paths
def get_neurips_cellseg_supervised_dataset( root: Union[str, os.PathLike], split: str, patch_shape: Tuple[int, int], make_rgb: bool = True, label_transform: Optional[Any] = None, label_transform2: Optional[Any] = None, raw_transform: Optional[Any] = None, transform: Optional[Any] = None, label_dtype: torch.dtype = torch.float32, n_samples: Optional[int] = None, sampler: Optional[Any] = None, download: bool = False) -> torch.utils.data.dataset.Dataset:
122def get_neurips_cellseg_supervised_dataset(
123    root: Union[str, os.PathLike],
124    split: str,
125    patch_shape: Tuple[int, int],
126    make_rgb: bool = True,
127    label_transform: Optional[Any] = None,
128    label_transform2: Optional[Any] = None,
129    raw_transform: Optional[Any] = None,
130    transform: Optional[Any] = None,
131    label_dtype: torch.dtype = torch.float32,
132    n_samples: Optional[int] = None,
133    sampler: Optional[Any] = None,
134    download: bool = False,
135) -> Dataset:
136    f"""Get the dataset for cell segmentation from the NeurIPS Cell Seg Challenge.
137
138    Args:
139        root: Filepath to a folder where the downloaded data will be saved.
140        split: The data split to download. Available splits are:
141            {', '.join(URL.keys())}
142        patch_shape: The patch shape to use for training.
143        make_rgb: Whether to map all data to RGB or treat it as grayscale.
144        label_transform: Transformation of labels, applied before data augmentation.
145        label_transform2: Transformation of labels, applied after data augmentation.
146        raw_transform: Transformation of the raw data.
147        label_dtype: The data type of the label data.
148        n_samples: Number of samples per epoch from this dataset.
149        sampler: Sampler for rejecting batches.
150        download: Whether to download the data if it is not present.
151
152    Returns:
153        The segmentation dataset.
154    """
155    assert split in ("train", "val", "test"), split
156    image_paths, label_paths = get_neurips_cellseg_paths(root, split, download)
157
158    if raw_transform is None:
159        trafo = to_rgb if make_rgb else None
160        raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo)
161
162    if transform is None:
163        transform = torch_em.transform.get_augmentations(ndim=2)
164
165    return ImageCollectionDataset(
166        raw_image_paths=image_paths,
167        label_image_paths=label_paths,
168        patch_shape=patch_shape,
169        raw_transform=raw_transform,
170        label_transform=label_transform,
171        label_transform2=label_transform2,
172        label_dtype=label_dtype,
173        transform=transform,
174        n_samples=n_samples,
175        sampler=sampler
176    )
def get_neurips_cellseg_supervised_loader( root: Union[str, os.PathLike], split: str, patch_shape: Tuple[int, int], batch_size: int, make_rgb: bool = True, label_transform: Optional[Any] = None, label_transform2: Optional[Any] = None, raw_transform: Optional[Any] = None, transform: Optional[Any] = None, label_dtype: torch.dtype = torch.float32, n_samples: Optional[Any] = None, sampler: Optional[Any] = None, download: bool = False, **loader_kwargs) -> torch.utils.data.dataloader.DataLoader:
179def get_neurips_cellseg_supervised_loader(
180    root: Union[str, os.PathLike],
181    split: str,
182    patch_shape: Tuple[int, int],
183    batch_size: int,
184    make_rgb: bool = True,
185    label_transform: Optional[Any] = None,
186    label_transform2: Optional[Any] = None,
187    raw_transform: Optional[Any] = None,
188    transform: Optional[Any] = None,
189    label_dtype: torch.dtype = torch.float32,
190    n_samples: Optional[Any] = None,
191    sampler: Optional[Any] = None,
192    download: bool = False,
193    **loader_kwargs
194) -> DataLoader:
195    f"""Get the dataset for cell segmentation from the NeurIPS Cell Seg Challenge.
196
197    Args:
198        root: Filepath to a folder where the downloaded data will be saved.
199        split: The data split to download. Available splits are:
200            {', '.join(URL.keys())}
201        patch_shape: The patch shape to use for training.
202        batch_size: The batch size for training.
203        make_rgb: Whether to map all data to RGB or treat it as grayscale.
204        label_transform: Transformation of labels, applied before data augmentation.
205        label_transform2: Transformation of labels, applied after data augmentation.
206        raw_transform: Transformation of the raw data.
207        transform: Transformation applied to raw and label data.
208        label_dtype: The data type of the label data.
209        n_samples: Number of samples per epoch from this dataset.
210        sampler: Sampler for rejecting batches.
211        download: Whether to download the data if it is not present.
212        loader_kwargs: Keyword arguments for the PyTorch DataLoader.
213
214    Returns:
215        The DataLoader.
216    """
217    ds = get_neurips_cellseg_supervised_dataset(
218        root=root,
219        split=split,
220        patch_shape=patch_shape,
221        make_rgb=make_rgb,
222        label_transform=label_transform,
223        label_transform2=label_transform2,
224        raw_transform=raw_transform,
225        transform=transform,
226        label_dtype=label_dtype,
227        n_samples=n_samples,
228        sampler=sampler,
229        download=download
230    )
231    return torch_em.segmentation.get_data_loader(ds, batch_size, **loader_kwargs)
def get_neurips_cellseg_unsupervised_dataset( root: Union[str, os.PathLike], patch_shape: Tuple[int, int], make_rgb: bool = True, raw_transform: Optional[Any] = None, transform: Optional[Any] = None, dtype: torch.dtype = torch.float32, sampler: Optional[Any] = None, use_images: bool = True, use_wholeslide: bool = True, download: bool = False) -> torch.utils.data.dataset.Dataset:
260def get_neurips_cellseg_unsupervised_dataset(
261    root: Union[str, os.PathLike],
262    patch_shape: Tuple[int, int],
263    make_rgb: bool = True,
264    raw_transform: Optional[Any] = None,
265    transform: Optional[Any] = None,
266    dtype: torch.dtype = torch.float32,
267    sampler: Optional[Any] = None,
268    use_images: bool = True,
269    use_wholeslide: bool = True,
270    download: bool = False,
271) -> Dataset:
272    """Get the unsupervised dataset from the NeurIPS Cell Seg Challenge.
273
274    Args:
275        root: Filepath to a folder where the downloaded data will be saved.
276        patch_shape: The patch shape to use for training.
277        make_rgb: Whether to map all data to RGB or treat it as grayscale.
278        raw_transform: Transformation of the raw data.
279        transform: Transformation applied to raw and label data.
280        dtype: The data type of the image data.
281        sampler: Sampler for rejecting batches.
282        use_images: Whether to use the normal image data.
283        use_wholeslide: Whether to use the wholeslide image data.
284        download: Whether to download the data if it is not present.
285
286    Returns:
287        The segmentation dataset.
288    """
289    if raw_transform is None:
290        trafo = to_rgb if make_rgb else None
291        raw_transform = torch_em.transform.get_raw_transform(augmentation2=trafo)
292    if transform is None:
293        transform = torch_em.transform.get_augmentations(ndim=2)
294
295    datasets = []
296    if use_images:
297        image_paths = _get_image_paths(root, download)
298        datasets.append(
299            RawImageCollectionDataset(
300                raw_image_paths=image_paths,
301                patch_shape=patch_shape,
302                raw_transform=raw_transform,
303                transform=transform,
304                dtype=dtype,
305                sampler=sampler
306            )
307        )
308    if use_wholeslide:
309        image_paths, n_samples = _get_wholeslide_paths(root, patch_shape, download)
310        datasets.append(
311            RawImageCollectionDataset(
312                raw_image_paths=image_paths,
313                patch_shape=patch_shape,
314                raw_transform=raw_transform,
315                transform=transform,
316                dtype=dtype,
317                n_samples=n_samples,
318                sampler=sampler
319            )
320        )
321    assert len(datasets) > 0
322    return ConcatDataset(*datasets)

Get the unsupervised dataset from the NeurIPS Cell Seg Challenge.

Arguments:
  • root: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • make_rgb: Whether to map all data to RGB or treat it as grayscale.
  • raw_transform: Transformation of the raw data.
  • transform: Transformation applied to raw and label data.
  • dtype: The data type of the image data.
  • sampler: Sampler for rejecting batches.
  • use_images: Whether to use the normal image data.
  • use_wholeslide: Whether to use the wholeslide image data.
  • download: Whether to download the data if it is not present.
Returns:

The segmentation dataset.

def get_neurips_cellseg_unsupervised_loader( root: Union[str, os.PathLike], patch_shape: Tuple[int, int], batch_size: int, make_rgb: bool = True, raw_transform: Optional[Any] = None, transform: Optional[Any] = None, dtype: torch.dtype = torch.float32, sampler: Optional[Any] = None, use_images: bool = True, use_wholeslide: bool = True, download: bool = False, **loader_kwargs) -> torch.utils.data.dataloader.DataLoader:
325def get_neurips_cellseg_unsupervised_loader(
326    root: Union[str, os.PathLike],
327    patch_shape: Tuple[int, int],
328    batch_size: int,
329    make_rgb: bool = True,
330    raw_transform: Optional[Any] = None,
331    transform: Optional[Any] = None,
332    dtype: torch.dtype = torch.float32,
333    sampler: Optional[Any] = None,
334    use_images: bool = True,
335    use_wholeslide: bool = True,
336    download: bool = False,
337    **loader_kwargs,
338) -> DataLoader:
339    """Get the unsupervised dataset from the NeurIPS Cell Seg Challenge.
340
341    Args:
342        root: Filepath to a folder where the downloaded data will be saved.
343        patch_shape: The patch shape to use for training.
344        batch_size: The batch size for training.
345        make_rgb: Whether to map all data to RGB or treat it as grayscale.
346        raw_transform: Transformation of the raw data.
347        transform: Transformation applied to raw and label data.
348        dtype: The data type of the image data.
349        sampler: Sampler for rejecting batches.
350        use_images: Whether to use the normal image data.
351        use_wholeslide: Whether to use the wholeslide image data.
352        download: Whether to download the data if it is not present.
353        loader_kwargs: Keyword arguments for the PyTorch DataLoader.
354
355    Returns:
356        The DataLoader.
357    """
358    ds = get_neurips_cellseg_unsupervised_dataset(
359        root=root, patch_shape=patch_shape, make_rgb=make_rgb, raw_transform=raw_transform, transform=transform,
360        dtype=dtype, sampler=sampler, use_images=use_images, use_wholeslide=use_wholeslide, download=download
361    )
362    return torch_em.segmentation.get_data_loader(ds, batch_size, **loader_kwargs)

Get the unsupervised dataset from the NeurIPS Cell Seg Challenge.

Arguments:
  • root: Filepath to a folder where the downloaded data will be saved.
  • patch_shape: The patch shape to use for training.
  • batch_size: The batch size for training.
  • make_rgb: Whether to map all data to RGB or treat it as grayscale.
  • raw_transform: Transformation of the raw data.
  • transform: Transformation applied to raw and label data.
  • dtype: The data type of the image data.
  • sampler: Sampler for rejecting batches.
  • use_images: Whether to use the normal image data.
  • use_wholeslide: Whether to use the wholeslide image data.
  • download: Whether to download the data if it is not present.
  • loader_kwargs: Keyword arguments for the PyTorch DataLoader.
Returns:

The DataLoader.