torch_em.data.datasets.light_microscopy.usiigaci

The Usiigaci dataset contains annotations for cell segmentation in phase contrast microscopy images of NIH/3T3 fibroblasts.

This dataset is from the publication https://doi.org/10.1016/j.softx.2019.02.007. Please cite it if you use this dataset for your research.

  1"""The Usiigaci dataset contains annotations for cell segmentation in
  2phase contrast microscopy images of NIH/3T3 fibroblasts.
  3
  4This dataset is from the publication https://doi.org/10.1016/j.softx.2019.02.007.
  5Please cite it if you use this dataset for your research.
  6"""
  7
  8import os
  9import subprocess
 10from glob import glob
 11from natsort import natsorted
 12from typing import Union, Tuple, Literal, List
 13
 14from torch.utils.data import Dataset, DataLoader
 15
 16import torch_em
 17
 18from .. import util
 19
 20
 21def get_usiigaci_data(path: Union[os.PathLike, str], download: bool = False):
 22    """Download the Usiigaci dataset.
 23
 24    Args:
 25        path: Filepath to a folder where the data is downloaded for further processing.
 26        download: Whether to download the data if it is not present.
 27    """
 28    data_dir = os.path.join(path, "Usiigaci")
 29    if os.path.exists(data_dir):
 30        return
 31
 32    if not download:
 33        raise RuntimeError(f"Cannot find the data at {path}, but download was set to False")
 34
 35    subprocess.run(["git", "clone", "--quiet", "https://github.com/oist/Usiigaci", data_dir])
 36
 37
 38def get_usiigaci_paths(
 39    path: Union[os.PathLike, str], split: Literal['train', 'val'], download: bool = False
 40) -> Tuple[List[str], List[str]]:
 41    """Get paths to the Usiigaci data.
 42
 43    Args:
 44        path: Filepath to a folder where the data is downloaded for further processing.
 45        split: The data split to use. Either 'train' or 'val'.
 46        download: Whether to download the data if it is not present.
 47
 48    Returns:
 49        List of filepaths for the image data.
 50        List of filepaths for the label data.
 51    """
 52    get_usiigaci_data(path, download)
 53
 54    # Labeled images.
 55    base_dir = os.path.join(path, "Usiigaci", r"Mask R-CNN", split, "set*")
 56    raw_paths = natsorted(glob(os.path.join(base_dir, "raw.tif")))
 57    label_paths = natsorted(glob(os.path.join(base_dir, "instances_ids.png")))
 58
 59    if split == "train":
 60        # Example tracking data.
 61        base_dir = os.path.join(path, "Usiigaci", "ExampleData")
 62        raw_paths.extend(natsorted(glob(os.path.join(base_dir, "T98G_sample", "*.tif"))))
 63        label_paths.extend(natsorted(glob(os.path.join(base_dir, "T98G_sample_mask_avg", "*.png"))))
 64
 65    assert len(raw_paths) == len(label_paths)
 66
 67    return raw_paths, label_paths
 68
 69
 70def get_usiigaci_dataset(
 71    path: Union[os.PathLike, str],
 72    patch_shape: Tuple[int, int],
 73    split: Literal['train', 'val'],
 74    download: bool = False,
 75    **kwargs
 76) -> Dataset:
 77    """Get the Usiigaci dataset for cell segmentation.
 78
 79    Args:
 80        path: Filepath to a folder where the data is downloaded for further processing.
 81        patch_shape: The patch shape to use for training.
 82        split: The data split to use. Either 'train' or 'val'.
 83        download: Whether to download the data if it is not present.
 84        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 85
 86    Returns:
 87        The segmentation dataset.
 88    """
 89    raw_paths, label_paths = get_usiigaci_paths(path, split, download)
 90
 91    return torch_em.default_segmentation_dataset(
 92        raw_paths=raw_paths,
 93        raw_key=None,
 94        label_paths=label_paths,
 95        label_key=None,
 96        patch_shape=patch_shape,
 97        **kwargs
 98    )
 99
100
101def get_usiigaci_loader(
102    path: Union[os.PathLike, str],
103    batch_size: int,
104    patch_shape: Tuple[int, int],
105    split: Literal['train', 'val'],
106    download: bool = False,
107    **kwargs
108) -> DataLoader:
109    """Get the Usiigaci dataloader for cell segmentation.
110
111    Args:
112        path: Filepath to a folder where the data is downloaded for further processing.
113        batch_size: The batch size for training.
114        patch_shape: The patch shape to use for training.
115        split: The data split to use. Either 'train' or 'val'.
116        download: Whether to download the data if it is not present.
117        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
118
119    Returns:
120        The DataLoader.
121    """
122    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
123    dataset = get_usiigaci_dataset(path, patch_shape, split, download, **ds_kwargs)
124    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
def get_usiigaci_data(path: Union[os.PathLike, str], download: bool = False):
22def get_usiigaci_data(path: Union[os.PathLike, str], download: bool = False):
23    """Download the Usiigaci dataset.
24
25    Args:
26        path: Filepath to a folder where the data is downloaded for further processing.
27        download: Whether to download the data if it is not present.
28    """
29    data_dir = os.path.join(path, "Usiigaci")
30    if os.path.exists(data_dir):
31        return
32
33    if not download:
34        raise RuntimeError(f"Cannot find the data at {path}, but download was set to False")
35
36    subprocess.run(["git", "clone", "--quiet", "https://github.com/oist/Usiigaci", data_dir])

Download the Usiigaci dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
def get_usiigaci_paths( path: Union[os.PathLike, str], split: Literal['train', 'val'], download: bool = False) -> Tuple[List[str], List[str]]:
39def get_usiigaci_paths(
40    path: Union[os.PathLike, str], split: Literal['train', 'val'], download: bool = False
41) -> Tuple[List[str], List[str]]:
42    """Get paths to the Usiigaci data.
43
44    Args:
45        path: Filepath to a folder where the data is downloaded for further processing.
46        split: The data split to use. Either 'train' or 'val'.
47        download: Whether to download the data if it is not present.
48
49    Returns:
50        List of filepaths for the image data.
51        List of filepaths for the label data.
52    """
53    get_usiigaci_data(path, download)
54
55    # Labeled images.
56    base_dir = os.path.join(path, "Usiigaci", r"Mask R-CNN", split, "set*")
57    raw_paths = natsorted(glob(os.path.join(base_dir, "raw.tif")))
58    label_paths = natsorted(glob(os.path.join(base_dir, "instances_ids.png")))
59
60    if split == "train":
61        # Example tracking data.
62        base_dir = os.path.join(path, "Usiigaci", "ExampleData")
63        raw_paths.extend(natsorted(glob(os.path.join(base_dir, "T98G_sample", "*.tif"))))
64        label_paths.extend(natsorted(glob(os.path.join(base_dir, "T98G_sample_mask_avg", "*.png"))))
65
66    assert len(raw_paths) == len(label_paths)
67
68    return raw_paths, label_paths

Get paths to the Usiigaci data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • split: The data split to use. Either 'train' or 'val'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_usiigaci_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], split: Literal['train', 'val'], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
71def get_usiigaci_dataset(
72    path: Union[os.PathLike, str],
73    patch_shape: Tuple[int, int],
74    split: Literal['train', 'val'],
75    download: bool = False,
76    **kwargs
77) -> Dataset:
78    """Get the Usiigaci dataset for cell segmentation.
79
80    Args:
81        path: Filepath to a folder where the data is downloaded for further processing.
82        patch_shape: The patch shape to use for training.
83        split: The data split to use. Either 'train' or 'val'.
84        download: Whether to download the data if it is not present.
85        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
86
87    Returns:
88        The segmentation dataset.
89    """
90    raw_paths, label_paths = get_usiigaci_paths(path, split, download)
91
92    return torch_em.default_segmentation_dataset(
93        raw_paths=raw_paths,
94        raw_key=None,
95        label_paths=label_paths,
96        label_key=None,
97        patch_shape=patch_shape,
98        **kwargs
99    )

Get the Usiigaci dataset for cell segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train' or 'val'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_usiigaci_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], split: Literal['train', 'val'], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
102def get_usiigaci_loader(
103    path: Union[os.PathLike, str],
104    batch_size: int,
105    patch_shape: Tuple[int, int],
106    split: Literal['train', 'val'],
107    download: bool = False,
108    **kwargs
109) -> DataLoader:
110    """Get the Usiigaci dataloader for cell segmentation.
111
112    Args:
113        path: Filepath to a folder where the data is downloaded for further processing.
114        batch_size: The batch size for training.
115        patch_shape: The patch shape to use for training.
116        split: The data split to use. Either 'train' or 'val'.
117        download: Whether to download the data if it is not present.
118        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
119
120    Returns:
121        The DataLoader.
122    """
123    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
124    dataset = get_usiigaci_dataset(path, patch_shape, split, download, **ds_kwargs)
125    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the Usiigaci dataloader for cell segmentation.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • split: The data split to use. Either 'train' or 'val'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.