Source code for afmformats.formats.fmt_hdf5

import functools
import pathlib

import h5py
import numpy as np

from ..afm_data import column_dtypes, known_columns
from ..meta import IMAGING_MODALITIES


__all__ = ["H5DictReader", "load_hdf5"]


[docs]class H5DictReader(object):
    def __init__(self, path_or_h5, enum_key):
        """Read-only HDF5-based dictionary for arrays

        Parameters
        ----------
        path_or_h5: str or pathlib.Path or h5py.Group
            Path to HDF5 file or an HDF5 group
        enum_key: str
            Name of the subgroup in `path_or_h5` that contains the data
            of the dictionary
        """
        if isinstance(path_or_h5, h5py.Group):
            # we are not responsible for the HDF5 file
            self.path = None
            self.h5 = path_or_h5
        else:
            # we are responsible for closing the HDF5 file
            self.path = path_or_h5
            self.h5 = None
        self.enum_key = enum_key
        self._columns = self.keys()

    def __contains__(self, key):
        return key in self._columns

    def __getitem__(self, key):
        if key not in known_columns:
            raise ValueError("Column '{}' is not documented!".format(key))
        elif key in self.keys():
            if self.path is not None:
                with h5py.File(self.path, "r") as h5:
                    val = np.asarray(h5[self.enum_key][key][:],
                                     dtype=column_dtypes[key])
            else:
                val = np.asarray(self.h5[self.enum_key][key][:],
                                 dtype=column_dtypes[key])
        else:
            raise KeyError("Column '{}' not in '{}/{}'".format(key, self.path,
                                                               self.enum_key))
        return val

    def __iter__(self):
        for kk in self._columns:
            yield kk

    @functools.lru_cache(maxsize=2)
    def keys(self):
        if self.path is not None:
            with h5py.File(self.path, "r") as h5:
                cols = sorted(h5[self.enum_key].keys())
        else:
            cols = sorted(self.h5[self.enum_key].keys())
        return cols


def detect_hdf5(path):
    """Detect HDF5 file format"""
    with h5py.File(path, mode="r") as h5:
        if "software" not in h5.attrs:
            return False
        elif "software version" not in h5.attrs:
            return False
        elif "0" not in h5:
            return False
        elif h5["0"].attrs["imaging mode"] not in IMAGING_MODALITIES:
            return False
        else:
            return True


[docs]def load_hdf5(path_or_h5, callback=None, meta_override=None):
    """Loads HDF5 files as exported by afmformats

    The HDF5 format is self explanatory. The root attributes
    contain the version of afmformats used to create it. For each
    curve, one group is created, named according to "0", "1", ...
    "9", "10", "11", etc. The attributes of each group are key-value
    pairs defined in :const:`afmformats.meta.KEYS_VALID`. The group
    contains datasets named according to
    :const:`afmformats.afm_data.known_columns` and have the attribute
    "unit" with the corresponding value in
    :const:`afmformats.afm_data.column_units`.

    Parameters
    ----------
    path_or_h5: str or pathlib.Path or h5py.Group
        path to HDF5 file or an HDF5 group
    callback: callable
        function for progress tracking; must accept a float in
        [0, 1] as an argument.
    meta_override: dict
        if specified, contains key-value pairs of metadata that
        are used when loading the files
        (see :data:`afmformats.meta.META_FIELDS`)

    Notes
    -----
    In case `path_or_h5` is a h5py.Group object, the
    "path" metadata variable will always be set to the
    path of the original HDF5 file. Keep this in mind
    if you think about storing multiple datasets (each
    containing multiple curves) in one HDF5 file (bad idea).
    """
    if meta_override is None:
        meta_override = {}
    else:
        # just make sure nobody expects a different result for the forces
        for key in ["sensitivity", "spring constant"]:
            if key in meta_override:
                raise NotImplementedError(
                    f"Setting metadata such as '{key}' is not implemented!")

    if isinstance(path_or_h5, h5py.Group):
        path = pathlib.Path(path_or_h5.file.filename)
        close = False
        h5 = path_or_h5
    else:
        path = pathlib.Path(path_or_h5)
        close = True
        h5 = h5py.File(path, "r")
    fdlist = []
    for enum_key in h5.keys():
        metadata = dict(h5[enum_key].attrs)
        metadata["path"] = path
        metadata["enum"] = int(enum_key)
        metadata.update(meta_override)
        data = H5DictReader(path_or_h5, enum_key=enum_key)
        fdlist.append({"data": data,
                       "metadata": metadata})
    if close:
        h5.close()
    if callback is not None:
        callback(1)
    return fdlist


recipe_hdf5 = {
    "descr": "HDF5-based",
    "detect": detect_hdf5,
    "loader": load_hdf5,
    "suffix": ".h5",
    "modalities": ["force-distance"],
    "maker": "afmformats",
}