Source code for dylightful.parser

import xml.etree.ElementTree as ET
import numpy as np
import json

from dylightful.utilities import save_dict, parse_file_path


[docs]def get_time_series(pml_path):
    """gets the time_series of the dynophore from the pml file

    Args:
        pml_path (str): path to the pml file containing the Dynophore trajectory

    Returns:
        [dictionary, JSON]: returns the time series for each superfeature as a JSON file
    """

    save_path = parse_file_path(pml_path)

    tree = ET.parse(pml_path)
    root = tree.getroot()
    time_series = {}
    cartesian_traj = {}
    centroids = {}
    for child in root:
        i = 0
        frames = []
        coordinates = []

        for attributes in child:
            if i == 0:
                x = float(attributes.get("x3"))
                y = float(attributes.get("y3"))
                z = float(attributes.get("z3"))
                centre = [x, y, z]
            else:  # first entry does not provide frameIndex information
                frame_idx = int(attributes.get("frameIndex"))
                x = float(attributes.get("x3"))
                y = float(attributes.get("y3"))
                z = float(attributes.get("z3"))
                coordinates.append([x, y, z])

                frames.append(frame_idx)
                if i == 1:  # get the value of the last frameIndex
                    max_index = frame_idx + 1  # counting in python starts at 0
                elif max_index < frame_idx + 1:
                    max_index = frame_idx + 1
            i += 1
        time_series[child.get("id")] = frames
        cartesian_traj[child.get("id")] = coordinates
        centroids[child.get("id")] = centre
    cartesian_full_traj = {}
    cartesian_full_traj["centroids"] = centroids
    cartesian_full_traj["cartesian"] = cartesian_traj
    save_dict(cartesian_full_traj, save_path=save_path, name="cartesian")
    time_series["num_frames"] = max_index
    time_series = rewrites_time_series(time_series)

    save_dict(time_series, save_path=save_path)
    return time_series


[docs]def rewrites_time_series(feature_series):
    """Convertes to a sparse time series to be ready for the HMM processing

    Args:
        feature_series (np.array):

    Returns:
        dictionionary, JSON: JSON with the time series per superfeature
    """

    max_frames = feature_series["num_frames"]
    keys = list(feature_series.keys())
    for i in range(len(keys) - 1):
        time_ser_feat = feature_series[keys[i]]
        new_time_ser = np.zeros(int(max_frames))
        try:
            for frame_index in time_ser_feat:
                try:
                    if frame_index < len(new_time_ser):
                        new_time_ser[int(frame_index)] = 1
                        if max_frames < frame_index:
                            max_frames = int(
                                frame_index
                            )  # if something with the frame_index is wrong set it here
                            print("superfeature:", keys[i])
                            print("Set max frames to", frame_index)
                            if i > 0:
                                for j in range(i):
                                    print("resetting", keys[j])
                                    tmp = list(feature_series[keys[j]])
                                    tmp += [0]
                                    feature_series[keys[j]] = tmp
                    else:
                        tmp = np.zeros(int(frame_index + 50))  # free new memory
                        tmp[: len(new_time_ser)] = new_time_ser
                        tmp[int(frame_index)] = 1
                        new_time_ser = tmp
                except:
                    print(
                        "Error parsing into new time series in superfeature, ",
                        keys[i],
                        "in frame",
                        frame_index,
                        "but the memory was only",
                        len(new_time_ser),
                        "time points",
                    )
                    continue
        except:
            raise RuntimeError("Fatal error while parsing superfeature", keys[i])
        new_time_ser = new_time_ser[:max_frames]
        assert len(new_time_ser) == max_frames, (
            "Lengths of parsed time series does not match the maximum number of frames. Length was"
            + str(len(new_time_ser))
        )
        feature_series[keys[i]] = new_time_ser.astype(np.int32).tolist()
    for i in range(len(keys) - 1):
        print(len(feature_series[keys[i]]))
    return feature_series


[docs]def get_atom_serials(pml_path):

    save_path = parse_file_path(pml_path)

    tree = ET.parse(pml_path)
    root = tree.getroot()
    for child in root:
        print(child)


[docs]def load_env_partners_mixed(json_path):
    """Generates the env_partners with occurences from the corresponding json

    Args:
        json_path ([type]): [description]
    """

    with open(json_path) as jsonFile:
        jsonObject = json.load(jsonFile)
        jsonFile.close()
    num_features = len(jsonObject["superfeatures"])
    storage_env_partners = {}
    for i in range(num_features):
        env_partners = jsonObject["superfeatures"][i]["envpartners"]

        for env_partner in env_partners:
            name = env_partner["name"]
            storage_env_partners[name + ":superFeature" + str(i)] = env_partner[
                "occurrences"
            ]
    return storage_env_partners


[docs]def load_env_partners(json_path):
    """Generates the env_partners with occurences from the corresponding json

    Args:
        json_path ([type]): [description]
    """

    with open(json_path) as jsonFile:
        jsonObject = json.load(jsonFile)
        jsonFile.close()
    num_features = len(jsonObject["superfeatures"])
    storage_env_partners = {}
    for i in range(num_features):
        env_partners = jsonObject["superfeatures"][i]["envpartners"]

        for env_partner in env_partners:
            name = env_partner["name"]
            storage_env_partners[name] = []
        for env_partner in env_partners:
            name = env_partner["name"]
            storage_env_partners[name].append(env_partner["occurrences"])
    return storage_env_partners


if __name__ == "__main__":
    # get_time_series("../Trajectories/Dominique/1KE7_dynophore.json")
    # get_time_series("../tests/Trajectories/1KE7_dynophore.pml")
    # get_atom_serials(pml_path="../tests/Trajectories/1KE7_dynophore.pml")
    res = load_env_partners(
        json_path="../tests/Trajectories/ZIKV/ZIKV-Pro-427-1_dynophore.json"
    )
    print(res)