import xml.etree.ElementTree as ET
import numpy as np
import json
from dylightful.utilities import save_dict, parse_file_path
[docs]def get_time_series(pml_path):
"""gets the time_series of the dynophore from the pml file
Args:
pml_path (str): path to the pml file containing the Dynophore trajectory
Returns:
[dictionary, JSON]: returns the time series for each superfeature as a JSON file
"""
save_path = parse_file_path(pml_path)
tree = ET.parse(pml_path)
root = tree.getroot()
time_series = {}
cartesian_traj = {}
centroids = {}
for child in root:
i = 0
frames = []
coordinates = []
for attributes in child:
if i == 0:
x = float(attributes.get("x3"))
y = float(attributes.get("y3"))
z = float(attributes.get("z3"))
centre = [x, y, z]
else: # first entry does not provide frameIndex information
frame_idx = int(attributes.get("frameIndex"))
x = float(attributes.get("x3"))
y = float(attributes.get("y3"))
z = float(attributes.get("z3"))
coordinates.append([x, y, z])
frames.append(frame_idx)
if i == 1: # get the value of the last frameIndex
max_index = frame_idx + 1 # counting in python starts at 0
elif max_index < frame_idx + 1:
max_index = frame_idx + 1
i += 1
time_series[child.get("id")] = frames
cartesian_traj[child.get("id")] = coordinates
centroids[child.get("id")] = centre
cartesian_full_traj = {}
cartesian_full_traj["centroids"] = centroids
cartesian_full_traj["cartesian"] = cartesian_traj
save_dict(cartesian_full_traj, save_path=save_path, name="cartesian")
time_series["num_frames"] = max_index
time_series = rewrites_time_series(time_series)
save_dict(time_series, save_path=save_path)
return time_series
[docs]def rewrites_time_series(feature_series):
"""Convertes to a sparse time series to be ready for the HMM processing
Args:
feature_series (np.array):
Returns:
dictionionary, JSON: JSON with the time series per superfeature
"""
max_frames = feature_series["num_frames"]
keys = list(feature_series.keys())
for i in range(len(keys) - 1):
time_ser_feat = feature_series[keys[i]]
new_time_ser = np.zeros(int(max_frames))
try:
for frame_index in time_ser_feat:
try:
if frame_index < len(new_time_ser):
new_time_ser[int(frame_index)] = 1
if max_frames < frame_index:
max_frames = int(
frame_index
) # if something with the frame_index is wrong set it here
print("superfeature:", keys[i])
print("Set max frames to", frame_index)
if i > 0:
for j in range(i):
print("resetting", keys[j])
tmp = list(feature_series[keys[j]])
tmp += [0]
feature_series[keys[j]] = tmp
else:
tmp = np.zeros(int(frame_index + 50)) # free new memory
tmp[: len(new_time_ser)] = new_time_ser
tmp[int(frame_index)] = 1
new_time_ser = tmp
except:
print(
"Error parsing into new time series in superfeature, ",
keys[i],
"in frame",
frame_index,
"but the memory was only",
len(new_time_ser),
"time points",
)
continue
except:
raise RuntimeError("Fatal error while parsing superfeature", keys[i])
new_time_ser = new_time_ser[:max_frames]
assert len(new_time_ser) == max_frames, (
"Lengths of parsed time series does not match the maximum number of frames. Length was"
+ str(len(new_time_ser))
)
feature_series[keys[i]] = new_time_ser.astype(np.int32).tolist()
for i in range(len(keys) - 1):
print(len(feature_series[keys[i]]))
return feature_series
[docs]def get_atom_serials(pml_path):
save_path = parse_file_path(pml_path)
tree = ET.parse(pml_path)
root = tree.getroot()
for child in root:
print(child)
[docs]def load_env_partners_mixed(json_path):
"""Generates the env_partners with occurences from the corresponding json
Args:
json_path ([type]): [description]
"""
with open(json_path) as jsonFile:
jsonObject = json.load(jsonFile)
jsonFile.close()
num_features = len(jsonObject["superfeatures"])
storage_env_partners = {}
for i in range(num_features):
env_partners = jsonObject["superfeatures"][i]["envpartners"]
for env_partner in env_partners:
name = env_partner["name"]
storage_env_partners[name + ":superFeature" + str(i)] = env_partner[
"occurrences"
]
return storage_env_partners
[docs]def load_env_partners(json_path):
"""Generates the env_partners with occurences from the corresponding json
Args:
json_path ([type]): [description]
"""
with open(json_path) as jsonFile:
jsonObject = json.load(jsonFile)
jsonFile.close()
num_features = len(jsonObject["superfeatures"])
storage_env_partners = {}
for i in range(num_features):
env_partners = jsonObject["superfeatures"][i]["envpartners"]
for env_partner in env_partners:
name = env_partner["name"]
storage_env_partners[name] = []
for env_partner in env_partners:
name = env_partner["name"]
storage_env_partners[name].append(env_partner["occurrences"])
return storage_env_partners
if __name__ == "__main__":
# get_time_series("../Trajectories/Dominique/1KE7_dynophore.json")
# get_time_series("../tests/Trajectories/1KE7_dynophore.pml")
# get_atom_serials(pml_path="../tests/Trajectories/1KE7_dynophore.pml")
res = load_env_partners(
json_path="../tests/Trajectories/ZIKV/ZIKV-Pro-427-1_dynophore.json"
)
print(res)