Source code for InnerEye.ML.utils.hdf5_util

#  ------------------------------------------------------------------------------------------
#  Copyright (c) Microsoft Corporation. All rights reserved.
#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
#  ------------------------------------------------------------------------------------------
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Optional, Type, TypeVar, Union

import h5py
import numpy as np

from InnerEye.ML.utils.image_util import ImageDataType

DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"


[docs]class HDF5Field(Enum): PATIENT_ID = "id" DATE = "acquisition_date" VOLUME = "volume" SEGMENTATION = "segmentation"
[docs]class HDF5ImageDataType(Enum): """ Data type of medical image data (e.g. masks and labels) Segmentation label maps (LABEL) are one-hot encoded. """ IMAGE = ImageDataType.IMAGE.value SEGMENTATION = ImageDataType.SEGMENTATION.value MASK = ImageDataType.MASK.value THICKNESS = np.float32 VESSELS = np.float32 QUANTITY = np.float64
T = TypeVar('T', bound='HDF5Object')
[docs]class HDF5Object: """ An HDF5 file. Each of volume (images), segmentation (labels), acquisition date and patient ID must be provided """ def __init__(self, patient_id: str, volume: np.ndarray, acquisition_date: Union[str, datetime], segmentation: Optional[np.ndarray]) -> None: """ :param patient_id: The id of the patient :param volume: the image for this patient :param acquisition_date: (str or datetime) :param segmentation: the segmentation maps for the volume """ self.patient_id = patient_id self.volume = volume self.segmentation = segmentation parsed_date: Optional[datetime] if isinstance(acquisition_date, datetime): parsed_date = acquisition_date else: parsed_date = HDF5Object.parse_acquisition_date(acquisition_date) if not parsed_date: raise ValueError( f"Stored acquisition date is not ISO601 format {DATE_FORMAT} - found {acquisition_date}") self.acquisition_date = parsed_date
[docs] @staticmethod def parse_acquisition_date(date: str) -> Optional[datetime]: """ Converts a string representing a date to a datetime object :param date: string representing a date :return: converted date, None if the string is invalid for date conversion. """ try: return datetime.strptime(date, DATE_FORMAT) except: return None
@staticmethod def _hdf5_data_path(data_field: HDF5Field) -> str: root_path = "/" return root_path + data_field.value @staticmethod def _load_image(hdf5_data: h5py.File, data_field: HDF5Field) -> np.ndarray: """ Load the volume from the HDF5 file. :param hdf5_data: path to the hdf5 file :param data_field: field of the hdf5 file containing the data :return: image as numpy array """ img = hdf5_data[HDF5Object._hdf5_data_path(data_field)][()] # N x C x H x W # ensure a 4D image is loaded if img.ndim != 4: raise ValueError(f"The loaded image should be 4D (image.shape: {img.shape})") n_channels = img.shape[1] if n_channels != 1: raise ValueError(f"Expected number of channels to be 1 but instead found {n_channels}") # squeeze channels dim (N == 1) - return N x H x W return np.squeeze(img, axis=1)
[docs] @classmethod def from_file(cls: Type[T], hdf5_path: Path, load_segmentation: bool) -> T: """ Load HDF5 object from file :param hdf5_path: Path to an HDF5 file :param load_segmentation: If True it loads segmentation (if present on the same file as the image). :return: HDF5 object """ hdf5_data = h5py.File(str(hdf5_path), 'r') expected_keys = set([k.value for k in HDF5Field]) act_keys = list(hdf5_data.keys()) if not expected_keys.issubset(act_keys): raise ValueError(f"HDF5 group should at least have the datasets: {expected_keys} but found {act_keys}") patient_id = hdf5_data[cls._hdf5_data_path(HDF5Field.PATIENT_ID)][()] volume = cls._load_image(hdf5_data, HDF5Field.VOLUME) segmentation = cls._load_image(hdf5_data, HDF5Field.SEGMENTATION) if load_segmentation else None acquisition_date = hdf5_data[cls._hdf5_data_path(HDF5Field.DATE)][()] return cls(patient_id=patient_id, volume=volume, segmentation=segmentation, acquisition_date=acquisition_date)