torch_em.data.datasets.medical.isles

The ISLES dataset contains annotations for ischemic stroke lesion segmentation in multimodal brain MRI scans.

The database is located at https://doi.org/10.5281/zenodo.7960856. This dataset is from the ISLES 2022 Challenge - https://doi.org/10.1038/s41597-022-01875-5. Please cite it if you use this dataset for a publication.

  1"""The ISLES dataset contains annotations for ischemic stroke lesion segmentation
  2in multimodal brain MRI scans.
  3
  4The database is located at https://doi.org/10.5281/zenodo.7960856.
  5This dataset is from the ISLES 2022 Challenge - https://doi.org/10.1038/s41597-022-01875-5.
  6Please cite it if you use this dataset for a publication.
  7"""
  8
  9import os
 10from glob import glob
 11from typing import Union, Tuple, Optional, Literal, List
 12
 13from torch.utils.data import Dataset, DataLoader
 14
 15import torch_em
 16
 17from .. import util
 18
 19
 20URL = "https://zenodo.org/records/7960856/files/ISLES-2022.zip"
 21CHECKSUM = "f374895e383f725ddd280db41ef36ed975277c33de0e587a631ca7ea7ad45d6b"
 22
 23
 24def get_isles_data(path: Union[os.PathLike, str], download: bool = False) -> str:
 25    """Download the ISLES dataset.
 26
 27    Args:
 28        path: Filepath to a folder where the data is downloaded for further processing.
 29        download: Whether to download the data if it is not present.
 30
 31    Returns:
 32        Filepath where the data is downloaded.
 33    """
 34    data_dir = os.path.join(path, "ISLES-2022")
 35    if os.path.exists(data_dir):
 36        return data_dir
 37
 38    os.makedirs(path, exist_ok=True)
 39
 40    zip_path = os.path.join(path, "ISLES-2022.zip")
 41    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
 42    util.unzip(zip_path=zip_path, dst=path)
 43
 44    return data_dir
 45
 46
 47def get_isles_paths(
 48    path: Union[os.PathLike, str], modality: Optional[Literal["dwi", "adc"]] = None, download: bool = False
 49) -> Tuple[List[str], List[str]]:
 50    """Get paths to the ISLES data.
 51
 52    Args:
 53        path: Filepath to a folder where the data is downloaded for further processing.
 54        modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
 55        download: Whether to download the data if it is not present.
 56
 57    Returns:
 58        List of filepaths for the image data.
 59        List of filepaths for the label data.
 60    """
 61    data_dir = get_isles_data(path=path, download=download)
 62
 63    gt_paths = sorted(glob(os.path.join(data_dir, "derivatives", "sub-*", "**", "*.nii.gz"), recursive=True))
 64
 65    dwi_paths = sorted(glob(os.path.join(data_dir, "sub-*", "**", "dwi", "*_dwi.nii.gz"), recursive=True))
 66    adc_paths = sorted(glob(os.path.join(data_dir, "sub-*", "**", "dwi", "*_adc.nii.gz"), recursive=True))
 67
 68    if modality is None:
 69        image_paths = [(dwi_path, adc_path) for dwi_path, adc_path in zip(dwi_paths, adc_paths)]
 70    else:
 71        if modality == "dwi":
 72            image_paths = dwi_paths
 73        elif modality == "adc":
 74            image_paths = adc_paths
 75        else:
 76            raise ValueError(f"'{modality}' is not a valid modality.")
 77
 78    return image_paths, gt_paths
 79
 80
 81def get_isles_dataset(
 82    path: Union[os.PathLike, str],
 83    patch_shape: Tuple[int, int],
 84    modality: Optional[Literal["dwi", "adc"]] = None,
 85    download: bool = False,
 86    **kwargs
 87) -> Dataset:
 88    """Get the ISLES dataset for segmentation of ischemic stroke lesion.
 89
 90    Args:
 91        path: Filepath to a folder where the data is downloaded for further processing.
 92        patch_shape: The patch shape to use for training.
 93        modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
 94        download: Whether to download the data if it is not present.
 95        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 96
 97    Returns:
 98        The segmentation dataset.
 99    """
100    image_paths, gt_paths = get_isles_paths(path, modality, download)
101
102    dataset = torch_em.default_segmentation_dataset(
103        raw_paths=image_paths,
104        raw_key="data",
105        label_paths=gt_paths,
106        label_key="data",
107        patch_shape=patch_shape,
108        with_channels=modality is None,
109        **kwargs
110    )
111    if "sampler" in kwargs:
112        for ds in dataset.datasets:
113            ds.max_sampling_attempts = 5000
114
115    return dataset
116
117
118def get_isles_loader(
119    path: Union[os.PathLike, str],
120    batch_size: int,
121    patch_shape: Tuple[int, int],
122    modality: Optional[Literal["dwi", "adc"]] = None,
123    download: bool = False,
124    **kwargs
125) -> DataLoader:
126    """Get the ISLES dataloader for segmentation of ischemic stroke lesion.
127
128    Args:
129        path: Filepath to a folder where the data is downloaded for further processing.
130        batch_size: The batch size for training.
131        patch_shape: The patch shape to use for training.
132        modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
133        download: Whether to download the data if it is not present.
134        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
135
136    Returns:
137        The DataLoader.
138    """
139    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
140    dataset = get_isles_dataset(path, patch_shape, modality, download, **ds_kwargs)
141    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
URL = 'https://zenodo.org/records/7960856/files/ISLES-2022.zip'
CHECKSUM = 'f374895e383f725ddd280db41ef36ed975277c33de0e587a631ca7ea7ad45d6b'
def get_isles_data(path: Union[os.PathLike, str], download: bool = False) -> str:
25def get_isles_data(path: Union[os.PathLike, str], download: bool = False) -> str:
26    """Download the ISLES dataset.
27
28    Args:
29        path: Filepath to a folder where the data is downloaded for further processing.
30        download: Whether to download the data if it is not present.
31
32    Returns:
33        Filepath where the data is downloaded.
34    """
35    data_dir = os.path.join(path, "ISLES-2022")
36    if os.path.exists(data_dir):
37        return data_dir
38
39    os.makedirs(path, exist_ok=True)
40
41    zip_path = os.path.join(path, "ISLES-2022.zip")
42    util.download_source(path=zip_path, url=URL, download=download, checksum=CHECKSUM)
43    util.unzip(zip_path=zip_path, dst=path)
44
45    return data_dir

Download the ISLES dataset.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • download: Whether to download the data if it is not present.
Returns:

Filepath where the data is downloaded.

def get_isles_paths( path: Union[os.PathLike, str], modality: Optional[Literal['dwi', 'adc']] = None, download: bool = False) -> Tuple[List[str], List[str]]:
48def get_isles_paths(
49    path: Union[os.PathLike, str], modality: Optional[Literal["dwi", "adc"]] = None, download: bool = False
50) -> Tuple[List[str], List[str]]:
51    """Get paths to the ISLES data.
52
53    Args:
54        path: Filepath to a folder where the data is downloaded for further processing.
55        modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
56        download: Whether to download the data if it is not present.
57
58    Returns:
59        List of filepaths for the image data.
60        List of filepaths for the label data.
61    """
62    data_dir = get_isles_data(path=path, download=download)
63
64    gt_paths = sorted(glob(os.path.join(data_dir, "derivatives", "sub-*", "**", "*.nii.gz"), recursive=True))
65
66    dwi_paths = sorted(glob(os.path.join(data_dir, "sub-*", "**", "dwi", "*_dwi.nii.gz"), recursive=True))
67    adc_paths = sorted(glob(os.path.join(data_dir, "sub-*", "**", "dwi", "*_adc.nii.gz"), recursive=True))
68
69    if modality is None:
70        image_paths = [(dwi_path, adc_path) for dwi_path, adc_path in zip(dwi_paths, adc_paths)]
71    else:
72        if modality == "dwi":
73            image_paths = dwi_paths
74        elif modality == "adc":
75            image_paths = adc_paths
76        else:
77            raise ValueError(f"'{modality}' is not a valid modality.")
78
79    return image_paths, gt_paths

Get paths to the ISLES data.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
  • download: Whether to download the data if it is not present.
Returns:

List of filepaths for the image data. List of filepaths for the label data.

def get_isles_dataset( path: Union[os.PathLike, str], patch_shape: Tuple[int, int], modality: Optional[Literal['dwi', 'adc']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataset.Dataset:
 82def get_isles_dataset(
 83    path: Union[os.PathLike, str],
 84    patch_shape: Tuple[int, int],
 85    modality: Optional[Literal["dwi", "adc"]] = None,
 86    download: bool = False,
 87    **kwargs
 88) -> Dataset:
 89    """Get the ISLES dataset for segmentation of ischemic stroke lesion.
 90
 91    Args:
 92        path: Filepath to a folder where the data is downloaded for further processing.
 93        patch_shape: The patch shape to use for training.
 94        modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
 95        download: Whether to download the data if it is not present.
 96        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 97
 98    Returns:
 99        The segmentation dataset.
100    """
101    image_paths, gt_paths = get_isles_paths(path, modality, download)
102
103    dataset = torch_em.default_segmentation_dataset(
104        raw_paths=image_paths,
105        raw_key="data",
106        label_paths=gt_paths,
107        label_key="data",
108        patch_shape=patch_shape,
109        with_channels=modality is None,
110        **kwargs
111    )
112    if "sampler" in kwargs:
113        for ds in dataset.datasets:
114            ds.max_sampling_attempts = 5000
115
116    return dataset

Get the ISLES dataset for segmentation of ischemic stroke lesion.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • patch_shape: The patch shape to use for training.
  • modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset.
Returns:

The segmentation dataset.

def get_isles_loader( path: Union[os.PathLike, str], batch_size: int, patch_shape: Tuple[int, int], modality: Optional[Literal['dwi', 'adc']] = None, download: bool = False, **kwargs) -> torch.utils.data.dataloader.DataLoader:
119def get_isles_loader(
120    path: Union[os.PathLike, str],
121    batch_size: int,
122    patch_shape: Tuple[int, int],
123    modality: Optional[Literal["dwi", "adc"]] = None,
124    download: bool = False,
125    **kwargs
126) -> DataLoader:
127    """Get the ISLES dataloader for segmentation of ischemic stroke lesion.
128
129    Args:
130        path: Filepath to a folder where the data is downloaded for further processing.
131        batch_size: The batch size for training.
132        patch_shape: The patch shape to use for training.
133        modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
134        download: Whether to download the data if it is not present.
135        kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset` or for the PyTorch DataLoader.
136
137    Returns:
138        The DataLoader.
139    """
140    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
141    dataset = get_isles_dataset(path, patch_shape, modality, download, **ds_kwargs)
142    return torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)

Get the ISLES dataloader for segmentation of ischemic stroke lesion.

Arguments:
  • path: Filepath to a folder where the data is downloaded for further processing.
  • batch_size: The batch size for training.
  • patch_shape: The patch shape to use for training.
  • modality: The choice of modality for MRIs. Either 'dwi' or 'adc'.
  • download: Whether to download the data if it is not present.
  • kwargs: Additional keyword arguments for torch_em.default_segmentation_dataset or for the PyTorch DataLoader.
Returns:

The DataLoader.