Source code for afmformats.formats.fmt_tab

import json
import pathlib

import numpy as np

from ..afm_data import column_dtypes, known_columns


__all__ = ["load_tab"]


def detect_tab(path, max_header=1000):
    """Test whether `path` is in the tab format"""
    has_begin = False
    has_data = False
    has_end = False
    with path.open() as fd:
        for _ in range(max_header):
            line = fd.readline().strip()
            if line.startswith("# BEGIN METADATA"):
                has_begin = True
            elif line.startswith("# END METADATA") and has_begin:
                has_end = True
            elif len(line) == 0 or line.startswith("#"):
                continue
            else:
                # make sure the first line contains actual floats
                if line.count("\t"):
                    has_data = True
                break
    return has_begin and has_data and has_end


[docs]def load_tab(path, callback=None, meta_override=None):
    """Loads tab-separated-value files as exported by afmformats

    This is a simple tab-separated values files. The metadata
    may be present at the beginning of the file, commented out,
    as a json dump in a "BEGIN METADATA" - "END METADATA" block.
    The column data is listed below as a simple table.

    Parameters
    ----------
    path: str or pathlib.Path or io.TextIOBase
        path to a .tab file
    callback: callable
        function for progress tracking; must accept a float in
        [0, 1] as an argument.
    meta_override: dict
        if specified, contains key-value pairs of metadata that
        are used when loading the files
        (see :data:`afmformats.meta.META_FIELDS`)
    """
    if meta_override is None:
        meta_override = {}
    else:
        # just make sure nobody expects a different result for the forces
        for key in ["sensitivity", "spring constant"]:
            if key in meta_override:
                raise NotImplementedError(
                    f"Setting metadata such as '{key}' is not implemented!")

    path = pathlib.Path(path)
    with path.open() as fd:
        tsvdata = fd.readlines()

    # get the metadata
    dump = []
    injson = False
    for ii, line in enumerate(tsvdata):
        if line.startswith("# BEGIN METADATA"):
            injson = True
            continue
        elif line.startswith("# END METADATA"):
            break
        elif injson:
            dump.append(line.strip("#").strip())
    if dump:
        metadata = json.loads("\n".join(dump))
    else:
        metadata = {}
    metadata["path"] = path
    metadata["enum"] = 0

    # last line with a hash is the header
    for ii, line in enumerate(tsvdata):
        if not line.strip():
            # empty line
            pass
        elif line.startswith("#"):
            # header candidate
            header_line = line
        else:
            if ii == 0:
                raise ValueError("No header found in '{}'!".format(path))
            break
    else:
        raise ValueError("No data found in '{}'!".format(path))
    columns = header_line.strip("#").strip().split("\t")

    # load the data
    da = [f.strip() for f in tsvdata if f.strip() and not f.startswith("#")]
    # generate arrays
    data = {}
    for cc in columns:
        if cc in known_columns:
            data[cc] = np.zeros(len(da), dtype=column_dtypes[cc])
    for ii, line in enumerate(da):
        for jj, item in enumerate(line.strip().split("\t")):
            assert jj < len(columns)
            cc = columns[jj]
            if cc in known_columns:
                data[cc][ii] = string_to_dtype(item, column_dtypes[cc])

    metadata.update(meta_override)
    dd = {"data": data,
          "metadata": metadata}

    if callback is not None:
        callback(1)

    return [dd]


def string_to_dtype(astring, dtype):
    astring = astring.strip()
    if astring == "False":
        astring = "0"
    elif astring == "True":
        astring = "1"
    if dtype in [float, int, np.uint8]:
        return dtype(astring)
    else:
        raise ValueError("No conversion rule for dtype '{}'!".format(dtype))


recipe_tab = {
    "descr": "tab-separated values",
    "detect": detect_tab,
    "loader": load_tab,
    "suffix": ".tab",
    "modalities": ["force-distance"],
    "maker": "afmformats",
}