Source code for afmformats.formats.fmt_tab

import json
import pathlib

import numpy as np

from ..afm_data import column_dtypes, known_columns


__all__ = ["load_tab"]


def detect_tab(path, max_header=1000):
    """Test whether `path` is in the tab format"""
    has_begin = False
    has_data = False
    has_end = False
    with path.open() as fd:
        for _ in range(max_header):
            line = fd.readline().strip()
            if line.startswith("# BEGIN METADATA"):
                has_begin = True
            elif line.startswith("# END METADATA") and has_begin:
                has_end = True
            elif len(line) == 0 or line.startswith("#"):
                continue
            else:
                # make sure the first line contains actual floats
                if line.count("\t"):
                    has_data = True
                break
    return has_begin and has_data and has_end


[docs]def load_tab(path, callback=None, meta_override=None): """Loads tab-separated-value files as exported by afmformats This is a simple tab-separated values files. The metadata may be present at the beginning of the file, commented out, as a json dump in a "BEGIN METADATA" - "END METADATA" block. The column data is listed below as a simple table. Parameters ---------- path: str or pathlib.Path or io.TextIOBase path to a .tab file callback: callable function for progress tracking; must accept a float in [0, 1] as an argument. meta_override: dict if specified, contains key-value pairs of metadata that are used when loading the files (see :data:`afmformats.meta.META_FIELDS`) """ if meta_override is None: meta_override = {} else: # just make sure nobody expects a different result for the forces for key in ["sensitivity", "spring constant"]: if key in meta_override: raise NotImplementedError( f"Setting metadata such as '{key}' is not implemented!") path = pathlib.Path(path) with path.open() as fd: tsvdata = fd.readlines() # get the metadata dump = [] injson = False for ii, line in enumerate(tsvdata): if line.startswith("# BEGIN METADATA"): injson = True continue elif line.startswith("# END METADATA"): break elif injson: dump.append(line.strip("#").strip()) if dump: metadata = json.loads("\n".join(dump)) else: metadata = {} metadata["path"] = path metadata["enum"] = 0 # last line with a hash is the header for ii, line in enumerate(tsvdata): if not line.strip(): # empty line pass elif line.startswith("#"): # header candidate header_line = line else: if ii == 0: raise ValueError("No header found in '{}'!".format(path)) break else: raise ValueError("No data found in '{}'!".format(path)) columns = header_line.strip("#").strip().split("\t") # load the data da = [f.strip() for f in tsvdata if f.strip() and not f.startswith("#")] # generate arrays data = {} for cc in columns: if cc in known_columns: data[cc] = np.zeros(len(da), dtype=column_dtypes[cc]) for ii, line in enumerate(da): for jj, item in enumerate(line.strip().split("\t")): assert jj < len(columns) cc = columns[jj] if cc in known_columns: data[cc][ii] = string_to_dtype(item, column_dtypes[cc]) metadata.update(meta_override) dd = {"data": data, "metadata": metadata} if callback is not None: callback(1) return [dd]
def string_to_dtype(astring, dtype): astring = astring.strip() if astring == "False": astring = "0" elif astring == "True": astring = "1" if dtype in [float, int, np.uint8]: return dtype(astring) else: raise ValueError("No conversion rule for dtype '{}'!".format(dtype)) recipe_tab = { "descr": "tab-separated values", "detect": detect_tab, "loader": load_tab, "suffix": ".tab", "modalities": ["force-distance"], "maker": "afmformats", }