Browse Source

Add pairwise_ged function.

v0.2.x
jajupmochi 4 years ago
parent
commit
95598397a1
2 changed files with 82 additions and 45 deletions
  1. +1
    -1
      gklearn/ged/util/__init__.py
  2. +81
    -44
      gklearn/ged/util/util.py

+ 1
- 1
gklearn/ged/util/__init__.py View File

@@ -1,3 +1,3 @@
from gklearn.ged.util.lsape_solver import LSAPESolver from gklearn.ged.util.lsape_solver import LSAPESolver
from gklearn.ged.util.util import compute_geds, ged_options_to_string
from gklearn.ged.util.util import pairwise_ged, compute_geds, get_nb_edit_operations, ged_options_to_string
from gklearn.ged.util.util import compute_geds_cml, label_costs_to_matrix from gklearn.ged.util.util import compute_geds_cml, label_costs_to_matrix

+ 81
- 44
gklearn/ged/util/util.py View File

@@ -23,7 +23,7 @@ def compute_ged(g1, g2, options):
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants']) ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
ged_env.add_nx_graph(g1, '') ged_env.add_nx_graph(g1, '')
ged_env.add_nx_graph(g2, '') ged_env.add_nx_graph(g2, '')
listID = ged_env.get_all_graph_ids()
listID = ged_env.get_all_graph_ids()
ged_env.init(init_type=options['init_option']) ged_env.init(init_type=options['init_option'])
ged_env.set_method(options['method'], ged_options_to_string(options)) ged_env.set_method(options['method'], ged_options_to_string(options))
ged_env.init_method() ged_env.init_method()
@@ -33,9 +33,46 @@ def compute_ged(g1, g2, options):
ged_env.run_method(g, h) ged_env.run_method(g, h)
pi_forward = ged_env.get_forward_map(g, h) pi_forward = ged_env.get_forward_map(g, h)
pi_backward = ged_env.get_backward_map(g, h) pi_backward = ged_env.get_backward_map(g, h)
upper = ged_env.get_upper_bound(g, h)
upper = ged_env.get_upper_bound(g, h)
dis = upper dis = upper

# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
# print(pi_forward)

return dis, pi_forward, pi_backward


def pairwise_ged(g1, g2, options={}, sort=True, repeats=1, parallel=False, verbose=True):
from gklearn.gedlib import librariesImport, gedlibpy

ged_env = gedlibpy.GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
ged_env.add_nx_graph(g1, '')
ged_env.add_nx_graph(g2, '')
listID = ged_env.get_all_graph_ids()
ged_env.init(init_option=(options['init_option'] if 'init_option' in options else 'EAGER_WITHOUT_SHUFFLED_COPIES'))
ged_env.set_method(options['method'], ged_options_to_string(options))
ged_env.init_method()

g = listID[0]
h = listID[1]
dis_min = np.inf
for i in range(0, repeats):
ged_env.run_method(g, h)
upper = ged_env.get_upper_bound(g, h)
dis = upper
if dis < dis_min:
dis_min = dis
pi_forward = ged_env.get_forward_map(g, h)
pi_backward = ged_env.get_backward_map(g, h)
# lower = ged_env.get_lower_bound(g, h)

# make the map label correct (label remove map as np.inf) # make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()] nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()] nodes2 = [n for n in g2.nodes()]
@@ -56,7 +93,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
for g in graphs: for g in graphs:
ged_env.add_nx_graph(g, '') ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids() listID = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels() node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels() edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None node_label_costs = label_costs_to_matrix(options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None
@@ -73,7 +110,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
if node_label_costs is None and edge_label_costs is None: if node_label_costs is None and edge_label_costs is None:
neo_options = {'edit_cost': options['edit_cost'], neo_options = {'edit_cost': options['edit_cost'],
'is_cml': False, 'is_cml': False,
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
else: else:
neo_options = {'edit_cost': options['edit_cost'], neo_options = {'edit_cost': options['edit_cost'],
@@ -114,7 +151,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
# print(i, j, idx_itr, dis) # print(i, j, idx_itr, dis)
pool.close() pool.close()
pool.join() pool.join()
else: else:
ged_vec = [] ged_vec = []
n_edit_operations = [] n_edit_operations = []
@@ -146,7 +183,7 @@ def compute_geds(graphs, options={}, sort=True, repeats=1, parallel=False, verbo
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants']) ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
for g in graphs: for g in graphs:
ged_env.add_nx_graph(g, '') ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids()
listID = ged_env.get_all_graph_ids()
ged_env.init() ged_env.init()
if parallel: if parallel:
options['threads'] = 1 options['threads'] = 1
@@ -155,7 +192,7 @@ def compute_geds(graphs, options={}, sort=True, repeats=1, parallel=False, verbo


# compute ged. # compute ged.
neo_options = {'edit_cost': options['edit_cost'], neo_options = {'edit_cost': options['edit_cost'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
ged_mat = np.zeros((len(graphs), len(graphs))) ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel: if parallel:
@@ -191,7 +228,7 @@ def compute_geds(graphs, options={}, sort=True, repeats=1, parallel=False, verbo
# print(i, j, idx_itr, dis) # print(i, j, idx_itr, dis)
pool.close() pool.close()
pool.join() pool.join()
else: else:
ged_vec = [] ged_vec = []
n_edit_operations = [] n_edit_operations = []
@@ -232,14 +269,14 @@ def _compute_ged_parallel(env, gid1, gid2, g1, g2, options, sort, repeats):




def _compute_ged(env, gid1, gid2, g1, g2, repeats): def _compute_ged(env, gid1, gid2, g1, g2, repeats):
dis_min = np.inf
dis_min = np.inf # @todo: maybe compare distance and then do others (faster).
for i in range(0, repeats): for i in range(0, repeats):
env.run_method(gid1, gid2) env.run_method(gid1, gid2)
pi_forward = env.get_forward_map(gid1, gid2) pi_forward = env.get_forward_map(gid1, gid2)
pi_backward = env.get_backward_map(gid1, gid2) pi_backward = env.get_backward_map(gid1, gid2)
upper = env.get_upper_bound(gid1, gid2)
upper = env.get_upper_bound(gid1, gid2)
dis = upper dis = upper
# make the map label correct (label remove map as np.inf) # make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()] nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()] nodes2 = [n for n in g2.nodes()]
@@ -247,7 +284,7 @@ def _compute_ged(env, gid1, gid2, g1, g2, repeats):
nb2 = nx.number_of_nodes(g2) nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
if dis < dis_min: if dis < dis_min:
dis_min = dis dis_min = dis
pi_forward_min = pi_forward pi_forward_min = pi_forward
@@ -268,7 +305,7 @@ def label_costs_to_matrix(costs, nb_labels):


Returns Returns
------- -------
cost_matrix : numpy.array.
cost_matrix : numpy.array.
The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData. The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData.
""" """
# Initialize label cost matrix. # Initialize label cost matrix.
@@ -282,13 +319,13 @@ def label_costs_to_matrix(costs, nb_labels):
for row in range(1, nb_labels + 1): for row in range(1, nb_labels + 1):
cost_matrix[row, 0] = costs[i] cost_matrix[row, 0] = costs[i]
i += 1 i += 1
# Costs of substitutions.
# Costs of substitutions.
for row in range(1, nb_labels + 1): for row in range(1, nb_labels + 1):
for col in range(row + 1, nb_labels + 1): for col in range(row + 1, nb_labels + 1):
cost_matrix[row, col] = costs[i] cost_matrix[row, col] = costs[i]
cost_matrix[col, row] = costs[i] cost_matrix[col, row] = costs[i]
i += 1 i += 1
return cost_matrix return cost_matrix




@@ -299,7 +336,7 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is
edge_labels = kwargs.get('edge_labels', []) edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map, return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels) node_labels=node_labels, edge_labels=edge_labels)
else:
else:
raise Exception('Edit cost "', edit_cost, '" is not supported.') raise Exception('Edit cost "', edit_cost, '" is not supported.')
else: else:
if edit_cost == 'LETTER' or edit_cost == 'LETTER2': if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
@@ -307,21 +344,21 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is
elif edit_cost == 'NON_SYMBOLIC': elif edit_cost == 'NON_SYMBOLIC':
node_attrs = kwargs.get('node_attrs', []) node_attrs = kwargs.get('node_attrs', [])
edge_attrs = kwargs.get('edge_attrs', []) edge_attrs = kwargs.get('edge_attrs', [])
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=node_attrs, edge_attrs=edge_attrs) node_attrs=node_attrs, edge_attrs=edge_attrs)
elif edit_cost == 'CONSTANT': elif edit_cost == 'CONSTANT':
node_labels = kwargs.get('node_labels', []) node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', []) edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels) node_labels=node_labels, edge_labels=edge_labels)
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=[], edge_labels=[]): node_labels=[], edge_labels=[]):
"""Compute times that edit operations are used in an edit path for symbolic-labeled graphs, where the costs are different for each pair of nodes. """Compute times that edit operations are used in an edit path for symbolic-labeled graphs, where the costs are different for each pair of nodes.
Returns Returns
------- -------
list list
@@ -330,7 +367,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
# Initialize. # Initialize.
nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels))) nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels)))
nb_ops_edge = np.zeros((1 + len(edge_labels), 1 + len(edge_labels))) nb_ops_edge = np.zeros((1 + len(edge_labels), 1 + len(edge_labels)))
# For nodes. # For nodes.
nodes1 = [n for n in g1.nodes()] nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map): for i, map_i in enumerate(forward_map):
@@ -350,7 +387,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
label = tuple(g2.nodes[nodes2[i]].items()) label = tuple(g2.nodes[nodes2[i]].items())
idx_label = node_labels.index(label) # @todo: faster idx_label = node_labels.index(label) # @todo: faster
nb_ops_node[0, idx_label + 1] += 1 nb_ops_node[0, idx_label + 1] += 1
# For edges. # For edges.
edges1 = [e for e in g1.edges()] edges1 = [e for e in g1.edges()]
edges2_marked = [] edges2_marked = []
@@ -371,7 +408,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
label2 = tuple(g2.edges[(nf2, nt2)].items()) label2 = tuple(g2.edges[(nf2, nt2)].items())
if label1 != label2: if label1 != label2:
idx_label2 = edge_labels.index(label2) # @todo: faster idx_label2 = edge_labels.index(label2) # @todo: faster
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
# Switch nf2 and nt2, for directed graphs. # Switch nf2 and nt2, for directed graphs.
elif (nt2, nf2) in g2.edges(): elif (nt2, nf2) in g2.edges():
edges2_marked.append((nt2, nf2)) edges2_marked.append((nt2, nf2))
@@ -389,7 +426,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
label = tuple(g2.edges[(nt, nf)].items()) label = tuple(g2.edges[(nt, nf)].items())
idx_label = edge_labels.index(label) # @todo: faster idx_label = edge_labels.index(label) # @todo: faster
nb_ops_edge[0, idx_label + 1] += 1 nb_ops_edge[0, idx_label + 1] += 1
# Reform the numbers of edit oeprations into a vector. # Reform the numbers of edit oeprations into a vector.
nb_eo_vector = [] nb_eo_vector = []
# node insertion. # node insertion.
@@ -412,9 +449,9 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
for i in range(1, len(nb_ops_edge)): for i in range(1, len(nb_ops_edge)):
for j in range(i + 1, len(nb_ops_edge)): for j in range(i + 1, len(nb_ops_edge)):
nb_eo_vector.append(nb_ops_edge[i, j]) nb_eo_vector.append(nb_ops_edge[i, j])
return nb_eo_vector return nb_eo_vector


def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
node_labels=[], edge_labels=[]): node_labels=[], edge_labels=[]):
@@ -426,7 +463,7 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
n_ei = 0 n_ei = 0
n_er = 0 n_er = 0
n_es = 0 n_es = 0
nodes1 = [n for n in g1.nodes()] nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map): for i, map_i in enumerate(forward_map):
if map_i == np.inf: if map_i == np.inf:
@@ -441,9 +478,9 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
for map_i in backward_map: for map_i in backward_map:
if map_i == np.inf: if map_i == np.inf:
n_vi += 1 n_vi += 1
# idx_nodes1 = range(0, len(node1)) # idx_nodes1 = range(0, len(node1))
edges1 = [e for e in g1.edges()] edges1 = [e for e in g1.edges()]
nb_edges2_cnted = 0 nb_edges2_cnted = 0
for n1, n2 in edges1: for n1, n2 in edges1:
@@ -475,7 +512,7 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
else: else:
n_er += 1 n_er += 1
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
return n_vi, n_vr, n_vs, n_ei, n_er, n_es return n_vi, n_vr, n_vs, n_ei, n_er, n_es




@@ -488,7 +525,7 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
sod_vs = 0 sod_vs = 0
n_ei = 0 n_ei = 0
n_er = 0 n_er = 0
nodes1 = [n for n in g1.nodes()] nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map): for i, map_i in enumerate(forward_map):
if map_i == np.inf: if map_i == np.inf:
@@ -501,9 +538,9 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
for map_i in backward_map: for map_i in backward_map:
if map_i == np.inf: if map_i == np.inf:
n_vi += 1 n_vi += 1
# idx_nodes1 = range(0, len(node1)) # idx_nodes1 = range(0, len(node1))
edges1 = [e for e in g1.edges()] edges1 = [e for e in g1.edges()]
nb_edges2_cnted = 0 nb_edges2_cnted = 0
for n1, n2 in edges1: for n1, n2 in edges1:
@@ -520,7 +557,7 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
else: else:
n_er += 1 n_er += 1
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er




@@ -536,7 +573,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
n_er = 0 n_er = 0
n_es = 0 n_es = 0
sod_es = 0 sod_es = 0
nodes1 = [n for n in g1.nodes()] nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map): for i, map_i in enumerate(forward_map):
if map_i == np.inf: if map_i == np.inf:
@@ -551,9 +588,9 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
for map_i in backward_map: for map_i in backward_map:
if map_i == np.inf: if map_i == np.inf:
n_vi += 1 n_vi += 1
# idx_nodes1 = range(0, len(node1)) # idx_nodes1 = range(0, len(node1))
edges1 = [e for e in g1.edges()] edges1 = [e for e in g1.edges()]
for n1, n2 in edges1: for n1, n2 in edges1:
idx1 = nodes1.index(n1) idx1 = nodes1.index(n1)
@@ -582,7 +619,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
else: else:
n_er += 1 n_er += 1
n_ei = nx.number_of_edges(g2) - n_es n_ei = nx.number_of_edges(g2) - n_es
return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es




@@ -615,7 +652,7 @@ def ged_options_to_string(options):
opt_str += '--log ' + str(val) + ' ' opt_str += '--log ' + str(val) + ' '
elif key == 'randomness': elif key == 'randomness':
opt_str += '--randomness ' + str(val) + ' ' opt_str += '--randomness ' + str(val) + ' '
# if not isinstance(val, list): # if not isinstance(val, list):
# opt_str += '--' + key.replace('_', '-') + ' ' # opt_str += '--' + key.replace('_', '-') + ' '
# if val == False: # if val == False:


Loading…
Cancel
Save