#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Oct 17 18:44:59 2019 @author: ljia """ import numpy as np import networkx as nx from tqdm import tqdm import sys from gedlibpy import librariesImport, gedlibpy def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50): """ Compute GED for 2 graphs. """ if lib == 'gedlibpy': def convertGraph(G): """Convert a graph to the proper NetworkX format that can be recognized by library gedlibpy. """ G_new = nx.Graph() for nd, attrs in G.nodes(data=True): G_new.add_node(str(nd), chem=attrs['atom']) for nd1, nd2, attrs in G.edges(data=True): # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) G_new.add_edge(str(nd1), str(nd2)) return G_new gedlibpy.restart_env() gedlibpy.add_nx_graph(convertGraph(g1), "") gedlibpy.add_nx_graph(convertGraph(g2), "") listID = gedlibpy.get_all_graph_ids() gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) gedlibpy.init() gedlibpy.set_method(method, "") gedlibpy.init_method() g = listID[0] h = listID[1] if stabilizer == None: gedlibpy.run_method(g, h) pi_forward = gedlibpy.get_forward_map(g, h) pi_backward = gedlibpy.get_backward_map(g, h) upper = gedlibpy.get_upper_bound(g, h) lower = gedlibpy.get_lower_bound(g, h) elif stabilizer == 'min': upper = np.inf for itr in range(repeat): gedlibpy.run_method(g, h) upper_tmp = gedlibpy.get_upper_bound(g, h) if upper_tmp < upper: upper = upper_tmp pi_forward = gedlibpy.get_forward_map(g, h) pi_backward = gedlibpy.get_backward_map(g, h) lower = gedlibpy.get_lower_bound(g, h) if upper == 0: break dis = upper # make the map label correct (label remove map as np.inf) nodes1 = [n for n in g1.nodes()] nodes2 = [n for n in g2.nodes()] nb1 = nx.number_of_nodes(g1) nb2 = nx.number_of_nodes(g2) pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] return dis, pi_forward, pi_backward def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', edit_cost_constant=[], stabilizer='min', repeat=50): """ Compute GEDs for a group of graphs. """ if lib == 'gedlibpy': def convertGraph(G): """Convert a graph to the proper NetworkX format that can be recognized by library gedlibpy. """ G_new = nx.Graph() for nd, attrs in G.nodes(data=True): G_new.add_node(str(nd), chem=attrs['atom']) for nd1, nd2, attrs in G.edges(data=True): # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) G_new.add_edge(str(nd1), str(nd2)) return G_new gedlibpy.restart_env() gedlibpy.add_nx_graph(convertGraph(g1), "") gedlibpy.add_nx_graph(convertGraph(g2), "") listID = gedlibpy.get_all_graph_ids() gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) gedlibpy.init() gedlibpy.set_method(method, "") gedlibpy.init_method() g = listID[0] h = listID[1] if stabilizer == None: gedlibpy.run_method(g, h) pi_forward = gedlibpy.get_forward_map(g, h) pi_backward = gedlibpy.get_backward_map(g, h) upper = gedlibpy.get_upper_bound(g, h) lower = gedlibpy.get_lower_bound(g, h) elif stabilizer == 'min': upper = np.inf for itr in range(repeat): gedlibpy.run_method(g, h) upper_tmp = gedlibpy.get_upper_bound(g, h) if upper_tmp < upper: upper = upper_tmp pi_forward = gedlibpy.get_forward_map(g, h) pi_backward = gedlibpy.get_backward_map(g, h) lower = gedlibpy.get_lower_bound(g, h) if upper == 0: break dis = upper # make the map label correct (label remove map as np.inf) nodes1 = [n for n in g1.nodes()] nodes2 = [n for n in g2.nodes()] nb1 = nx.number_of_nodes(g1) nb2 = nx.number_of_nodes(g2) pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] return dis, pi_forward, pi_backward def ged_median(Gn, Gn_median, measure='ged', verbose=False, ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): dis_list = [] pi_forward_list = [] for idx, G in tqdm(enumerate(Gn), desc='computing median distances', file=sys.stdout) if verbose else enumerate(Gn): dis_sum = 0 pi_forward_list.append([]) for G_p in Gn_median: dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, cost=ged_cost, method=ged_method, saveGXL=saveGXL) pi_forward_list[idx].append(pi_tmp_forward) dis_sum += dis_tmp dis_list.append(dis_sum) return dis_list, pi_forward_list def get_nb_edit_operations(g1, g2, forward_map, backward_map): """Compute the number of each edit operations. """ n_vi = 0 n_vr = 0 n_vs = 0 n_ei = 0 n_er = 0 n_es = 0 nodes1 = [n for n in g1.nodes()] for i, map_i in enumerate(forward_map): if map_i == np.inf: n_vr += 1 elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: n_vs += 1 for map_i in backward_map: if map_i == np.inf: n_vi += 1 # idx_nodes1 = range(0, len(node1)) edges1 = [e for e in g1.edges()] nb_edges2_cnted = 0 for n1, n2 in edges1: idx1 = nodes1.index(n1) idx2 = nodes1.index(n2) # one of the nodes is removed, thus the edge is removed. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: n_er += 1 # corresponding edge is in g2. Edge label is not considered. elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ (forward_map[idx2], forward_map[idx1]) in g2.edges(): nb_edges2_cnted += 1 # corresponding nodes are in g2, however the edge is removed. else: n_er += 1 n_ei = nx.number_of_edges(g2) - nb_edges2_cnted return n_vi, n_vr, n_vs, n_ei, n_er, n_es