|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Fri Apr 26 11:49:12 2019
-
- Iterative alternate minimizations using GED.
- @author: ljia
- """
- import numpy as np
- import random
- import networkx as nx
-
- import sys
- #from Cython_GedLib_2 import librariesImport, script
- import librariesImport, script
- sys.path.insert(0, "../")
- from pygraph.utils.graphfiles import saveDataset
- from pygraph.utils.graphdataset import get_dataset_attributes
-
-
- def iam(Gn, node_label='atom', edge_label='bond_type'):
- """See my name, then you know what I do.
- """
- # Gn = Gn[0:10]
- Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-
- c_er = 1
- c_es = 1
- c_ei = 1
-
- # phase 1: initilize.
- # compute set-median.
- dis_min = np.inf
- pi_p = []
- pi_all = []
- for idx1, G_p in enumerate(Gn):
- dist_sum = 0
- pi_all.append([])
- for idx2, G_p_prime in enumerate(Gn):
- dist_tmp, pi_tmp = GED(G_p, G_p_prime)
- pi_all[idx1].append(pi_tmp)
- dist_sum += dist_tmp
- if dist_sum < dis_min:
- dis_min = dist_sum
- G = G_p.copy()
- idx_min = idx1
- # list of edit operations.
- pi_p = pi_all[idx_min]
-
- # phase 2: iteration.
- ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'],
- edge_label=edge_label)
- for itr in range(0, 10):
- G_new = G.copy()
- # update vertex labels.
- # pre-compute h_i0 for each label.
- # for label in get_node_labels(Gn, node_label):
- # print(label)
- # for nd in G.nodes(data=True):
- # pass
- if not ds_attrs['node_attr_dim']: # labels are symbolic
- for nd, _ in G.nodes(data=True):
- h_i0_list = []
- label_list = []
- for label in get_node_labels(Gn, node_label):
- h_i0 = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd]
- if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
- h_i0 += 1
- h_i0_list.append(h_i0)
- label_list.append(label)
- # choose one of the best randomly.
- idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
- idx_rdm = random.randint(0, len(idx_max) - 1)
- G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
- else: # labels are non-symbolic
- for nd, _ in G.nodes(data=True):
- Si_norm = 0
- phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd]
- if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
- Si_norm += 1
- phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
- phi_i_bar /= Si_norm
- G_new.nodes[nd]['attributes'] = phi_i_bar
-
- # update edge labels and adjacency matrix.
- if ds_attrs['edge_labeled']:
- for nd1, nd2, _ in G.edges(data=True):
- h_ij0_list = []
- label_list = []
- for label in get_edge_labels(Gn, edge_label):
- h_ij0 = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd1]
- pi_j = pi_p[idx][nd2]
- h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
- g.has_edge(pi_i, pi_j) and
- g.edges[pi_i, pi_j][edge_label] == label)
- h_ij0 += h_ij0_p
- h_ij0_list.append(h_ij0)
- label_list.append(label)
- # choose one of the best randomly.
- idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
- h_ij0_max = h_ij0_list[idx_max[0]]
- idx_rdm = random.randint(0, len(idx_max) - 1)
- best_label = label_list[idx_max[idx_rdm]]
-
- # check whether a_ij is 0 or 1.
- sij_norm = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd1]
- pi_j = pi_p[idx][nd2]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
- if not G_new.has_edge(nd1, nd2):
- G_new.add_edge(nd1, nd2)
- G_new.edges[nd1, nd2][edge_label] = best_label
- else:
- if G_new.has_edge(nd1, nd2):
- G_new.remove_edge(nd1, nd2)
- else: # if edges are unlabeled
- for nd1, nd2, _ in G.edges(data=True):
- sij_norm = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd1]
- pi_j = pi_p[idx][nd2]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if sij_norm > len(Gn) * c_er / (c_er + c_ei):
- if not G_new.has_edge(nd1, nd2):
- G_new.add_edge(nd1, nd2)
- else:
- if G_new.has_edge(nd1, nd2):
- G_new.remove_edge(nd1, nd2)
-
- G = G_new.copy()
-
- return G
-
-
- def GED(g1, g2, lib='gedlib'):
- """
- Compute GED. It is a dummy function for now.
- """
- if lib == 'gedlib':
- saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp')
- script.appel()
- script.PyRestartEnv()
- script.PyLoadGXLGraph('ged_tmp/', 'collections/tmp.xml')
- listID = script.PyGetGraphIds()
- script.PySetEditCost("CHEM_1")
- script.PyInitEnv()
- script.PySetMethod("BIPARTITE", "")
- script.PyInitMethod()
- g = listID[0]
- h = listID[1]
- script.PyRunMethod(g, h)
- liste = script.PyGetAllMap(g, h)
- upper = script.PyGetUpperBound(g, h)
- lower = script.PyGetLowerBound(g, h)
- dis = upper + lower
- pi = liste[0]
-
- return dis, pi
-
-
- def get_node_labels(Gn, node_label):
- nl = set()
- for G in Gn:
- nl = nl | set(nx.get_node_attributes(G, node_label).values())
- return nl
-
-
- def get_edge_labels(Gn, edge_label):
- el = set()
- for G in Gn:
- el = el | set(nx.get_edge_attributes(G, edge_label).values())
- return el
-
-
- if __name__ == '__main__':
- from pygraph.utils.graphfiles import loadDataset
- ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
- 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
- # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
- # 'extra_params': {}} # node nsymb
- # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
- # 'extra_params': {}}
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
- iam(Gn)
|