Browse Source

Update the computation of numbers of edit operations for GEDs using pre-defined costs between labels.

v0.2.x
jajupmochi 5 years ago
parent
commit
15945eb272
2 changed files with 158 additions and 21 deletions
  1. +22
    -0
      gklearn/ged/env/ged_env.py
  2. +136
    -21
      gklearn/ged/util/util.py

+ 22
- 0
gklearn/ged/env/ged_env.py View File

@@ -357,6 +357,17 @@ class GEDEnv(object):
return len(self.__ged_data._node_labels)
def get_all_node_labels(self):
"""
/*!
* @brief Returns the list of all node labels.
* @return List of pairwise different node labels contained in the environment.
* @note If @p 1 is returned, the nodes are unlabeled.
*/
"""
return self.__ged_data._node_labels
def get_node_label(self, label_id, to_dict=True):
"""
/*!
@@ -382,6 +393,17 @@ class GEDEnv(object):
"""
return len(self.__ged_data._edge_labels)
def get_all_edge_labels(self):
"""
/*!
* @brief Returns the list of all edge labels.
* @return List of pairwise different edge labels contained in the environment.
* @note If @p 1 is returned, the edges are unlabeled.
*/
"""
return self.__ged_data._edge_labels
def get_edge_label(self, label_id, to_dict=True):
"""


+ 136
- 21
gklearn/ged/util/util.py View File

@@ -49,14 +49,17 @@ def compute_ged(g1, g2, options):


def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):
node_label_costs = options['node_label_costs'] if 'node_label_costs' in options else None
edge_label_costs = options['edge_label_costs'] if 'edge_label_costs' in options else None

# initialize ged env.
ged_env = GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
for g in graphs:
ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids()
ged_env.set_label_costs(options['node_label_costs'] if 'node_label_costs' in options else None,
options['edge_label_costs'] if 'edge_label_costs' in options else None)
ged_env.set_label_costs(node_label_costs, edge_label_costs)
ged_env.init(init_type=options['init_option'])
if parallel:
options['threads'] = 1
@@ -64,9 +67,13 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
ged_env.init_method()

# compute ged.
# options used to compute numbers of edit operations.
neo_options = {'edit_cost': options['edit_cost'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
# 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
# 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs'],
'is_cml': True,
'node_labels': ged_env.get_all_node_labels(),
'edge_labels': ged_env.get_all_edge_labels()}
ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
@@ -122,8 +129,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_edit_operations.append(n_eo_tmp)

return ged_vec, ged_mat, n_edit_operations

return ged_vec, ged_mat, n_edit_operations


def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True):
@@ -237,21 +243,130 @@ def _compute_ged(env, gid1, gid2, g1, g2):
return dis, pi_forward, pi_backward


def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs):
if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
elif edit_cost == 'NON_SYMBOLIC':
node_attrs = kwargs.get('node_attrs', [])
edge_attrs = kwargs.get('edge_attrs', [])
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=node_attrs, edge_attrs=edge_attrs)
elif edit_cost == 'CONSTANT':
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels)
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
if is_cml:
if edit_cost == 'CONSTANT':
node_label_costs = kwargs.get('node_label_costs')
edge_label_costs = kwargs.get('edge_label_costs')
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels)
else:
raise Exception('Edit cost "', edit_cost, '" is not supported.')
else:
if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
elif edit_cost == 'NON_SYMBOLIC':
node_attrs = kwargs.get('node_attrs', [])
edge_attrs = kwargs.get('edge_attrs', [])
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=node_attrs, edge_attrs=edge_attrs)
elif edit_cost == 'CONSTANT':
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels)
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=[], edge_labels=[]):
"""Compute the number of each edit operations for symbolic-labeled graphs, where the costs are different for each pair of nodes.
Returns
-------
list
A vector of costs bewteen labels, formed in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. The dummy label is the first label, and the self label costs are not included.
"""
# Initialize.
nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels)))
nb_ops_edge = np.zeros((1 + len(edge_labels), 1 + len(edge_labels)))
# For nodes.
nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map):
label1 = tuple(g1.nodes[nodes1[i]].items()) # @todo: order and faster
idx_label1 = node_labels.index(label1) # @todo: faster
if map_i == np.inf: # deletions.
nb_ops_node[0, idx_label1 + 1] += 1
else: # substitutions.
label2 = tuple(g2.nodes[map_i].items())
if label1 != label2:
idx_label2 = node_labels.index(label2) # @todo: faster
nb_ops_node[idx_label1 + 1, idx_label2 + 1] += 1
# insertions.
nodes2 = [n for n in g2.nodes()]
for i, map_i in enumerate(backward_map):
if map_i == np.inf:
label = tuple(g2.nodes[nodes2[i]].items())
idx_label = node_labels.index(label) # @todo: faster
nb_ops_node[idx_label + 1, 0] += 1
# For edges.
edges1 = [e for e in g1.edges()]
edges2_marked = []
for nf1, nt1 in edges1:
label1 = tuple(g1.edges[(nf1, nt1)].items())
idx_label1 = edge_labels.index(label1) # @todo: faster
idxf1 = nodes1.index(nf1) # @todo: faster
idxt1 = nodes1.index(nt1) # @todo: faster
# At least one of the nodes is removed, thus the edge is removed.
if forward_map[idxf1] == np.inf or forward_map[idxt1] == np.inf:
nb_ops_edge[0, idx_label1 + 1] += 1
# corresponding edge is in g2.
else:
nf2, nt2 = forward_map[idxf1], forward_map[idxt1]
if (nf2, nt2) in g2.edges():
edges2_marked.append((nf2, nt2))
# If edge labels are different.
label2 = tuple(g2.edges[(nf2, nt2)].items())
if label1 != label2:
idx_label2 = edge_labels.index(label2) # @todo: faster
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
# Switch nf2 and nt2, for directed graphs.
elif (nt2, nf2) in g2.edges():
edges2_marked.append((nt2, nf2))
# If edge labels are different.
label2 = tuple(g2.edges[(nt2, nf2)].items())
if label1 != label2:
idx_label2 = edge_labels.index(label2) # @todo: faster
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
# Corresponding nodes are in g2, however the edge is removed.
else:
nb_ops_edge[0, idx_label1 + 1] += 1
# insertions.
for e in g2.edges():
if e not in edges2_marked:
label = tuple(g2.edges[e].items())
idx_label = edge_labels.index(label) # @todo: faster
nb_ops_edge[idx_label + 1, 0] += 1
# Reform the costs into a vector.
cost_vector = []
# Add node insertion costs.
for i in range(1, len(nb_ops_node)):
cost_vector.append(nb_ops_node[i, 0])
# Add node deletion costs.
for i in range(1, len(nb_ops_node)):
cost_vector.append(nb_ops_node[0, i])
# Add node substitution costs.
for i in range(1, len(nb_ops_node)):
for j in range(i + 1, len(nb_ops_node)):
cost_vector.append(nb_ops_node[i, j])
# Add edge insertion costs.
for i in range(1, len(nb_ops_edge)):
cost_vector.append(nb_ops_edge[i, 0])
# Add edge deletion costs.
for i in range(1, len(nb_ops_edge)):
cost_vector.append(nb_ops_edge[0, i])
# Add edge substitution costs.
for i in range(1, len(nb_ops_edge)):
for j in range(i + 1, len(nb_ops_edge)):
cost_vector.append(nb_ops_edge[i, j])
return cost_vector

def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,


Loading…
Cancel
Save