@@ -49,16 +49,18 @@ def compute_ged(g1, g2, options):
def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):
def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):
node_label_costs = options['node_label_costs'] if 'node_label_costs' in options else None
edge_label_costs = options['edge_label_costs'] if 'edge_label_costs' in options else None
# initialize ged env.
# initialize ged env.
ged_env = GEDEnv()
ged_env = GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
for g in graphs:
for g in graphs:
ged_env.add_nx_graph(g, '')
ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids()
listID = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None
edge_label_costs = label_costs_to_matrix(options['edge_label_costs'], len(edge_labels)) if 'edge_label_costs' in options else None
ged_env.set_label_costs(node_label_costs, edge_label_costs)
ged_env.set_label_costs(node_label_costs, edge_label_costs)
ged_env.init(init_type=options['init_option'])
ged_env.init(init_type=options['init_option'])
if parallel:
if parallel:
@@ -69,11 +71,9 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
# compute ged.
# compute ged.
# options used to compute numbers of edit operations.
# options used to compute numbers of edit operations.
neo_options = {'edit_cost': options['edit_cost'],
neo_options = {'edit_cost': options['edit_cost'],
# 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
# 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs'],
'is_cml': True,
'is_cml': True,
'node_labels': ged_env.get_all_ node_labels() ,
'edge_labels': ged_env.get_all_ edge_labels() }
'node_labels': node_labels,
'edge_labels': edge_labels}
ged_mat = np.zeros((len(graphs), len(graphs)))
ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel:
if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
@@ -243,11 +243,45 @@ def _compute_ged(env, gid1, gid2, g1, g2):
return dis, pi_forward, pi_backward
return dis, pi_forward, pi_backward
def label_costs_to_matrix(costs, nb_labels):
"""Reform a label cost vector to a matrix.
Parameters
----------
costs : numpy.array
The vector containing costs between labels, in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs.
nb_labels : integer
Number of labels.
Returns
-------
cost_matrix : numpy.array.
The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData.
"""
# Initialize label cost matrix.
cost_matrix = np.zeros((nb_labels + 1, nb_labels + 1))
i = 0
# Costs of insertions.
for col in range(1, nb_labels + 1):
cost_matrix[0, col] = costs[i]
i += 1
# Costs of deletions.
for row in range(1, nb_labels + 1):
cost_matrix[row, 0] = costs[i]
i += 1
# Costs of substitutions.
for row in range(1, nb_labels + 1):
for col in range(row + 1, nb_labels + 1):
cost_matrix[row, col] = costs[i]
cost_matrix[col, row] = costs[i]
i += 1
return cost_matrix
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
if is_cml:
if is_cml:
if edit_cost == 'CONSTANT':
if edit_cost == 'CONSTANT':
node_label_costs = kwargs.get('node_label_costs')
edge_label_costs = kwargs.get('edge_label_costs')
node_labels = kwargs.get('node_labels', [])
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
@@ -273,12 +307,12 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=[], edge_labels=[]):
node_labels=[], edge_labels=[]):
"""Compute the number of each edit operations for symbolic-labeled graphs, where the costs are different for each pair of nodes.
"""Compute times that edit operations are used in an edit path for symbolic-labeled graphs, where the costs are different for each pair of nodes.
Returns
Returns
-------
-------
list
list
A vector of costs bewteen labels, formed in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. The dummy label is the first label, and the self label costs are not included.
A vector of numbers of times that costs bewteen labels are used in an edit path , formed in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. The dummy label is the first label, and the self label costs are not included.
"""
"""
# Initialize.
# Initialize.
nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels)))
nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels)))
@@ -290,7 +324,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
label1 = tuple(g1.nodes[nodes1[i]].items()) # @todo: order and faster
label1 = tuple(g1.nodes[nodes1[i]].items()) # @todo: order and faster
idx_label1 = node_labels.index(label1) # @todo: faster
idx_label1 = node_labels.index(label1) # @todo: faster
if map_i == np.inf: # deletions.
if map_i == np.inf: # deletions.
nb_ops_node[0, idx_label1 + 1] += 1
nb_ops_node[idx_label1 + 1, 0 ] += 1
else: # substitutions.
else: # substitutions.
label2 = tuple(g2.nodes[map_i].items())
label2 = tuple(g2.nodes[map_i].items())
if label1 != label2:
if label1 != label2:
@@ -302,7 +336,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
if map_i == np.inf:
if map_i == np.inf:
label = tuple(g2.nodes[nodes2[i]].items())
label = tuple(g2.nodes[nodes2[i]].items())
idx_label = node_labels.index(label) # @todo: faster
idx_label = node_labels.index(label) # @todo: faster
nb_ops_node[idx_label + 1, 0 ] += 1
nb_ops_node[0, idx_label + 1] += 1
# For edges.
# For edges.
edges1 = [e for e in g1.edges()]
edges1 = [e for e in g1.edges()]
@@ -314,7 +348,7 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
idxt1 = nodes1.index(nt1) # @todo: faster
idxt1 = nodes1.index(nt1) # @todo: faster
# At least one of the nodes is removed, thus the edge is removed.
# At least one of the nodes is removed, thus the edge is removed.
if forward_map[idxf1] == np.inf or forward_map[idxt1] == np.inf:
if forward_map[idxf1] == np.inf or forward_map[idxt1] == np.inf:
nb_ops_edge[0, idx_label1 + 1] += 1
nb_ops_edge[idx_label1 + 1, 0 ] += 1
# corresponding edge is in g2.
# corresponding edge is in g2.
else:
else:
nf2, nt2 = forward_map[idxf1], forward_map[idxt1]
nf2, nt2 = forward_map[idxf1], forward_map[idxt1]
@@ -335,38 +369,38 @@ def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
# Corresponding nodes are in g2, however the edge is removed.
# Corresponding nodes are in g2, however the edge is removed.
else:
else:
nb_ops_edge[0, idx_label1 + 1] += 1
nb_ops_edge[idx_label1 + 1, 0 ] += 1
# insertions.
# insertions.
for e in g2.edges():
if e not in edges2_marked:
label = tuple(g2.edges[e ].items())
for nt, nf in g2.edges():
if (nt, nf) not in edges2_marked and (nf, nt) not in edges2_marked: # @todo: for directed.
label = tuple(g2.edges[(nt, nf) ].items())
idx_label = edge_labels.index(label) # @todo: faster
idx_label = edge_labels.index(label) # @todo: faster
nb_ops_edge[idx_label + 1, 0 ] += 1
nb_ops_edge[0, idx_label + 1] += 1
# Reform the cost s into a vector.
cost _vector = []
# Add node insertion costs .
# Reform the numbers of edit oepration s into a vector.
nb_eo _vector = []
# node insertion.
for i in range(1, len(nb_ops_node)):
for i in range(1, len(nb_ops_node)):
cost_vector.append(nb_ops_node[i, 0 ])
# Add node deletion costs .
nb_eo_vector.append(nb_ops_node[0, i ])
# node deletion.
for i in range(1, len(nb_ops_node)):
for i in range(1, len(nb_ops_node)):
cost_vector.append(nb_ops_node[0, i ])
# Add node substitution costs .
nb_eo_vector.append(nb_ops_node[i, 0 ])
# node substitution.
for i in range(1, len(nb_ops_node)):
for i in range(1, len(nb_ops_node)):
for j in range(i + 1, len(nb_ops_node)):
for j in range(i + 1, len(nb_ops_node)):
cost _vector.append(nb_ops_node[i, j])
# Add edge insertion costs .
nb_eo _vector.append(nb_ops_node[i, j])
# edge insertion.
for i in range(1, len(nb_ops_edge)):
for i in range(1, len(nb_ops_edge)):
cost_vector.append(nb_ops_edge[i, 0 ])
# Add edge deletion costs .
nb_eo_vector.append(nb_ops_edge[0, i ])
# edge deletion.
for i in range(1, len(nb_ops_edge)):
for i in range(1, len(nb_ops_edge)):
cost_vector.append(nb_ops_edge[0, i ])
# Add edge substitution costs .
nb_eo_vector.append(nb_ops_edge[i, 0 ])
# edge substitution.
for i in range(1, len(nb_ops_edge)):
for i in range(1, len(nb_ops_edge)):
for j in range(i + 1, len(nb_ops_edge)):
for j in range(i + 1, len(nb_ops_edge)):
cost _vector.append(nb_ops_edge[i, j])
nb_eo _vector.append(nb_ops_edge[i, j])
return cost _vector
return nb_eo _vector
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,