torch_em.data.datasets.light_microscopy.ifnuclei
The IFNuclei dataset contains annotations for nucleus segmentation of immuno and DAPI stained fluorescence images.
This dataset is from the publication https://doi.org/10.1038/s41597-020-00608-w. Please cite it if you use this dataset in your research.
1"""The IFNuclei dataset contains annotations for nucleus segmentation 2of immuno and DAPI stained fluorescence images. 3 4This dataset is from the publication https://doi.org/10.1038/s41597-020-00608-w. 5Please cite it if you use this dataset in your research. 6""" 7 8import os 9from glob import glob 10from natsort import natsorted 11from typing import Union, Tuple, List 12 13from torch.utils.data import Dataset, DataLoader 14 15import torch_em 16 17from .. import util 18 19 20URL = "https://www.ebi.ac.uk/biostudies/files/S-BSST265/dataset.zip" 21CHECKSUM = "8285987ed4d57c46a46a55a33c1c085875ea41f429b59cde31d249741aa07ad1" 22 23 24def get_ifnuclei_data(path: Union[os.PathLike, str], download: bool = False): 25 """Download the IFNuclei dataset for nucleus segmentation. 26 27 Args: 28 path: Filepath to a folder where the downloaded data will be saved. 29 download: Whether to download the data if it is not present. 30 """ 31 data_dir = os.path.join(path, "rawimages") 32 if os.path.exists(data_dir): 33 return 34 35 os.makedirs(path, exist_ok=True) 36 37 zip_path = os.path.join(path, "dataset.zip") 38 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 39 util.unzip(zip_path=zip_path, dst=path) 40 41 42def get_ifnuclei_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]: 43 """Get paths to the IFNuclei data. 44 45 Args: 46 path: Filepath to a folder where the downloaded data will be saved. 47 download: Whether to download the data if it is not present. 48 49 Returns: 50 List of filepaths for the image data. 51 List of filepaths for the label data. 52 """ 53 get_ifnuclei_data(path, download) 54 55 raw_paths = natsorted(glob(os.path.join(path, "rawimages", "*.tif"))) 56 label_paths = natsorted(glob(os.path.join(path, "groundtruth", "*"))) 57 58 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 59 60 return raw_paths, label_paths 61 62 63def get_ifnuclei_dataset( 64 path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs 65) -> Dataset: 66 """Get the IFNuclei dataset for nucleus segmentation. 67 68 Args: 69 path: Filepath to a folder where the downloaded data will be saved. 70 patch_shape: The patch shape to use for training. 71 download: Whether to download the data if it is not present. 72 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 73 74 Returns: 75 The segmentation dataset. 76 """ 77 raw_paths, label_paths = get_ifnuclei_paths(path, download) 78 79 return torch_em.default_segmentation_dataset( 80 raw_paths=raw_paths, 81 raw_key=None, 82 label_paths=label_paths, 83 label_key=None, 84 is_seg_dataset=False, 85 patch_shape=patch_shape, 86 **kwargs 87 ) 88 89 90def get_ifnuclei_loader( 91 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs 92) -> DataLoader: 93 """Get the IFNuclei dataloader for nucleus segmentation. 94 95 Args: 96 path: Filepath to a folder where the downloaded data will be saved. 97 batch_size: The batch size for training. 98 patch_shape: The patch shape to use for training. 99 download: Whether to download the data if it is not present. 100 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 101 102 Returns: 103 The DataLoader. 104 """ 105 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 106 dataset = get_ifnuclei_dataset(path, patch_shape, download, **ds_kwargs) 107 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL =
'https://www.ebi.ac.uk/biostudies/files/S-BSST265/dataset.zip'
CHECKSUM =
'8285987ed4d57c46a46a55a33c1c085875ea41f429b59cde31d249741aa07ad1'
def
get_ifnuclei_data(path: Union[os.PathLike, str], download: bool = False):
25def get_ifnuclei_data(path: Union[os.PathLike, str], download: bool = False): 26 """Download the IFNuclei dataset for nucleus segmentation. 27 28 Args: 29 path: Filepath to a folder where the downloaded data will be saved. 30 download: Whether to download the data if it is not present. 31 """ 32 data_dir = os.path.join(path, "rawimages") 33 if os.path.exists(data_dir): 34 return 35 36 os.makedirs(path, exist_ok=True) 37 38 zip_path = os.path.join(path, "dataset.zip") 39 util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM) 40 util.unzip(zip_path=zip_path, dst=path)
Download the IFNuclei dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
def
get_ifnuclei_paths( path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]:
43def get_ifnuclei_paths(path: Union[os.PathLike, str], download: bool = False) -> Tuple[List[int], List[int]]: 44 """Get paths to the IFNuclei data. 45 46 Args: 47 path: Filepath to a folder where the downloaded data will be saved. 48 download: Whether to download the data if it is not present. 49 50 Returns: 51 List of filepaths for the image data. 52 List of filepaths for the label data. 53 """ 54 get_ifnuclei_data(path, download) 55 56 raw_paths = natsorted(glob(os.path.join(path, "rawimages", "*.tif"))) 57 label_paths = natsorted(glob(os.path.join(path, "groundtruth", "*"))) 58 59 assert len(raw_paths) == len(label_paths) and len(raw_paths) > 0 60 61 return raw_paths, label_paths
Get paths to the IFNuclei data.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- download: Whether to download the data if it is not present.
Returns:
List of filepaths for the image data. List of filepaths for the label data.
def
get_ifnuclei_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
64def get_ifnuclei_dataset( 65 path: Union[os.PathLike, str], patch_shape: Tuple[int, int], download: bool = False, **kwargs 66) -> Dataset: 67 """Get the IFNuclei dataset for nucleus segmentation. 68 69 Args: 70 path: Filepath to a folder where the downloaded data will be saved. 71 patch_shape: The patch shape to use for training. 72 download: Whether to download the data if it is not present. 73 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`. 74 75 Returns: 76 The segmentation dataset. 77 """ 78 raw_paths, label_paths = get_ifnuclei_paths(path, download) 79 80 return torch_em.default_segmentation_dataset( 81 raw_paths=raw_paths, 82 raw_key=None, 83 label_paths=label_paths, 84 label_key=None, 85 is_seg_dataset=False, 86 patch_shape=patch_shape, 87 **kwargs 88 )
Get the IFNuclei dataset for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
.
Returns:
The segmentation dataset.
def
get_ifnuclei_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
91def get_ifnuclei_loader( 92 path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], download: bool = False, **kwargs 93) -> DataLoader: 94 """Get the IFNuclei dataloader for nucleus segmentation. 95 96 Args: 97 path: Filepath to a folder where the downloaded data will be saved. 98 batch_size: The batch size for training. 99 patch_shape: The patch shape to use for training. 100 download: Whether to download the data if it is not present. 101 kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader. 102 103 Returns: 104 The DataLoader. 105 """ 106 ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) 107 dataset = get_ifnuclei_dataset(path, patch_shape, download, **ds_kwargs) 108 return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
Get the IFNuclei dataloader for nucleus segmentation.
Arguments:
- path: Filepath to a folder where the downloaded data will be saved.
- batch_size: The batch size for training.
- patch_shape: The patch shape to use for training.
- download: Whether to download the data if it is not present.
- kwargs: Additional keyword arguments for
torch_em.default_segmentation_dataset
or for the PyTorch DataLoader.
Returns:
The DataLoader.