You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

iam.py 29 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Fri Apr 26 11:49:12 2019
  5. Iterative alternate minimizations using GED.
  6. @author: ljia
  7. """
  8. import numpy as np
  9. import random
  10. import networkx as nx
  11. from tqdm import tqdm
  12. import sys
  13. #from Cython_GedLib_2 import librariesImport, script
  14. import librariesImport, script
  15. sys.path.insert(0, "../")
  16. from pygraph.utils.graphfiles import saveDataset
  17. from pygraph.utils.graphdataset import get_dataset_attributes
  18. from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
  19. #from pygraph.utils.utils import graph_deepcopy
  20. def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type',
  21. connected=True):
  22. """See my name, then you know what I do.
  23. """
  24. # Gn = Gn[0:10]
  25. Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
  26. # phase 1: initilize.
  27. # compute set-median.
  28. dis_min = np.inf
  29. pi_p = []
  30. pi_all = []
  31. for idx1, G_p in enumerate(Gn):
  32. dist_sum = 0
  33. pi_all.append([])
  34. for idx2, G_p_prime in enumerate(Gn):
  35. dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
  36. pi_all[idx1].append(pi_tmp)
  37. dist_sum += dist_tmp
  38. if dist_sum < dis_min:
  39. dis_min = dist_sum
  40. G = G_p.copy()
  41. idx_min = idx1
  42. # list of edit operations.
  43. pi_p = pi_all[idx_min]
  44. # phase 2: iteration.
  45. ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'],
  46. edge_label=edge_label)
  47. for itr in range(0, 10): # @todo: the convergence condition?
  48. G_new = G.copy()
  49. # update vertex labels.
  50. # pre-compute h_i0 for each label.
  51. # for label in get_node_labels(Gn, node_label):
  52. # print(label)
  53. # for nd in G.nodes(data=True):
  54. # pass
  55. if not ds_attrs['node_attr_dim']: # labels are symbolic
  56. for nd, _ in G.nodes(data=True):
  57. h_i0_list = []
  58. label_list = []
  59. for label in get_node_labels(Gn, node_label):
  60. h_i0 = 0
  61. for idx, g in enumerate(Gn):
  62. pi_i = pi_p[idx][nd]
  63. if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
  64. h_i0 += 1
  65. h_i0_list.append(h_i0)
  66. label_list.append(label)
  67. # choose one of the best randomly.
  68. idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
  69. idx_rdm = random.randint(0, len(idx_max) - 1)
  70. G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
  71. else: # labels are non-symbolic
  72. for nd, _ in G.nodes(data=True):
  73. Si_norm = 0
  74. phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
  75. for idx, g in enumerate(Gn):
  76. pi_i = pi_p[idx][nd]
  77. if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
  78. Si_norm += 1
  79. phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
  80. phi_i_bar /= Si_norm
  81. G_new.nodes[nd]['attributes'] = phi_i_bar
  82. # update edge labels and adjacency matrix.
  83. if ds_attrs['edge_labeled']:
  84. for nd1, nd2, _ in G.edges(data=True):
  85. h_ij0_list = []
  86. label_list = []
  87. for label in get_edge_labels(Gn, edge_label):
  88. h_ij0 = 0
  89. for idx, g in enumerate(Gn):
  90. pi_i = pi_p[idx][nd1]
  91. pi_j = pi_p[idx][nd2]
  92. h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
  93. g.has_edge(pi_i, pi_j) and
  94. g.edges[pi_i, pi_j][edge_label] == label)
  95. h_ij0 += h_ij0_p
  96. h_ij0_list.append(h_ij0)
  97. label_list.append(label)
  98. # choose one of the best randomly.
  99. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
  100. h_ij0_max = h_ij0_list[idx_max[0]]
  101. idx_rdm = random.randint(0, len(idx_max) - 1)
  102. best_label = label_list[idx_max[idx_rdm]]
  103. # check whether a_ij is 0 or 1.
  104. sij_norm = 0
  105. for idx, g in enumerate(Gn):
  106. pi_i = pi_p[idx][nd1]
  107. pi_j = pi_p[idx][nd2]
  108. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  109. sij_norm += 1
  110. if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
  111. if not G_new.has_edge(nd1, nd2):
  112. G_new.add_edge(nd1, nd2)
  113. G_new.edges[nd1, nd2][edge_label] = best_label
  114. else:
  115. if G_new.has_edge(nd1, nd2):
  116. G_new.remove_edge(nd1, nd2)
  117. else: # if edges are unlabeled
  118. for nd1, nd2, _ in G.edges(data=True):
  119. sij_norm = 0
  120. for idx, g in enumerate(Gn):
  121. pi_i = pi_p[idx][nd1]
  122. pi_j = pi_p[idx][nd2]
  123. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  124. sij_norm += 1
  125. if sij_norm > len(Gn) * c_er / (c_er + c_ei):
  126. if not G_new.has_edge(nd1, nd2):
  127. G_new.add_edge(nd1, nd2)
  128. else:
  129. if G_new.has_edge(nd1, nd2):
  130. G_new.remove_edge(nd1, nd2)
  131. G = G_new.copy()
  132. # update pi_p
  133. pi_p = []
  134. for idx1, G_p in enumerate(Gn):
  135. dist_tmp, pi_tmp, _ = GED(G, G_p)
  136. pi_p.append(pi_tmp)
  137. return G
  138. def GED(g1, g2, lib='gedlib'):
  139. """
  140. Compute GED.
  141. """
  142. if lib == 'gedlib':
  143. # transform dataset to the 'xml' file as the GedLib required.
  144. saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp')
  145. # script.appel()
  146. script.PyRestartEnv()
  147. script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml')
  148. listID = script.PyGetGraphIds()
  149. script.PySetEditCost("LETTER") #("CHEM_1")
  150. script.PyInitEnv()
  151. script.PySetMethod("IPFP", "")
  152. script.PyInitMethod()
  153. g = listID[0]
  154. h = listID[1]
  155. script.PyRunMethod(g, h)
  156. pi_forward, pi_backward = script.PyGetAllMap(g, h)
  157. upper = script.PyGetUpperBound(g, h)
  158. lower = script.PyGetLowerBound(g, h)
  159. dis = upper
  160. # make the map label correct (label remove map as np.inf)
  161. nodes1 = [n for n in g1.nodes()]
  162. nodes2 = [n for n in g2.nodes()]
  163. nb1 = nx.number_of_nodes(g1)
  164. nb2 = nx.number_of_nodes(g2)
  165. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  166. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  167. return dis, pi_forward, pi_backward
  168. def median_distance(Gn, Gn_median, measure='ged', verbose=False):
  169. dis_list = []
  170. pi_forward_list = []
  171. for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
  172. file=sys.stdout) if verbose else enumerate(Gn):
  173. dis_sum = 0
  174. pi_forward_list.append([])
  175. for G_p in Gn_median:
  176. dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
  177. pi_forward_list[idx].append(pi_tmp_forward)
  178. dis_sum += dis_tmp
  179. dis_list.append(dis_sum)
  180. return dis_list, pi_forward_list
  181. # --------------------------- These are tests --------------------------------#
  182. def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1,
  183. node_label='atom', edge_label='bond_type'):
  184. """See my name, then you know what I do.
  185. """
  186. # Gn = Gn[0:10]
  187. Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
  188. # phase 1: initilize.
  189. # compute set-median.
  190. dis_min = np.inf
  191. # pi_p = []
  192. pi_all_forward = []
  193. pi_all_backward = []
  194. for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
  195. dist_sum = 0
  196. pi_all_forward.append([])
  197. pi_all_backward.append([])
  198. for idx2, G_p_prime in enumerate(Gn):
  199. dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
  200. pi_all_forward[idx1].append(pi_tmp_forward)
  201. pi_all_backward[idx1].append(pi_tmp_backward)
  202. dist_sum += dist_tmp
  203. if dist_sum <= dis_min:
  204. dis_min = dist_sum
  205. G = G_p.copy()
  206. idx_min = idx1
  207. # list of edit operations.
  208. pi_p_forward = pi_all_forward[idx_min]
  209. pi_p_backward = pi_all_backward[idx_min]
  210. # phase 2: iteration.
  211. ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'],
  212. edge_label=edge_label)
  213. label_set = get_node_labels(Gn + [G], node_label)
  214. for itr in range(0, 10): # @todo: the convergence condition?
  215. G_new = G.copy()
  216. # update vertex labels.
  217. # pre-compute h_i0 for each label.
  218. # for label in get_node_labels(Gn, node_label):
  219. # print(label)
  220. # for nd in G.nodes(data=True):
  221. # pass
  222. if not ds_attrs['node_attr_dim']: # labels are symbolic
  223. for nd in G.nodes():
  224. h_i0_list = []
  225. label_list = []
  226. for label in label_set:
  227. h_i0 = 0
  228. for idx, g in enumerate(Gn):
  229. pi_i = pi_p_forward[idx][nd]
  230. if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
  231. h_i0 += 1
  232. h_i0_list.append(h_i0)
  233. label_list.append(label)
  234. # choose one of the best randomly.
  235. idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
  236. idx_rdm = random.randint(0, len(idx_max) - 1)
  237. G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
  238. else: # labels are non-symbolic
  239. for nd in G.nodes():
  240. Si_norm = 0
  241. phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
  242. for idx, g in enumerate(Gn):
  243. pi_i = pi_p_forward[idx][nd]
  244. if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
  245. Si_norm += 1
  246. phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
  247. phi_i_bar /= Si_norm
  248. G_new.nodes[nd]['attributes'] = phi_i_bar
  249. # update edge labels and adjacency matrix.
  250. if ds_attrs['edge_labeled']:
  251. for nd1, nd2, _ in G.edges(data=True):
  252. h_ij0_list = []
  253. label_list = []
  254. for label in get_edge_labels(Gn, edge_label):
  255. h_ij0 = 0
  256. for idx, g in enumerate(Gn):
  257. pi_i = pi_p_forward[idx][nd1]
  258. pi_j = pi_p_forward[idx][nd2]
  259. h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
  260. g.has_edge(pi_i, pi_j) and
  261. g.edges[pi_i, pi_j][edge_label] == label)
  262. h_ij0 += h_ij0_p
  263. h_ij0_list.append(h_ij0)
  264. label_list.append(label)
  265. # choose one of the best randomly.
  266. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
  267. h_ij0_max = h_ij0_list[idx_max[0]]
  268. idx_rdm = random.randint(0, len(idx_max) - 1)
  269. best_label = label_list[idx_max[idx_rdm]]
  270. # check whether a_ij is 0 or 1.
  271. sij_norm = 0
  272. for idx, g in enumerate(Gn):
  273. pi_i = pi_p_forward[idx][nd1]
  274. pi_j = pi_p_forward[idx][nd2]
  275. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  276. sij_norm += 1
  277. if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
  278. if not G_new.has_edge(nd1, nd2):
  279. G_new.add_edge(nd1, nd2)
  280. G_new.edges[nd1, nd2][edge_label] = best_label
  281. else:
  282. if G_new.has_edge(nd1, nd2):
  283. G_new.remove_edge(nd1, nd2)
  284. else: # if edges are unlabeled
  285. # @todo: works only for undirected graphs.
  286. for nd1 in range(nx.number_of_nodes(G)):
  287. for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
  288. sij_norm = 0
  289. for idx, g in enumerate(Gn):
  290. pi_i = pi_p_forward[idx][nd1]
  291. pi_j = pi_p_forward[idx][nd2]
  292. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  293. sij_norm += 1
  294. if sij_norm > len(Gn) * c_er / (c_er + c_ei):
  295. if not G_new.has_edge(nd1, nd2):
  296. G_new.add_edge(nd1, nd2)
  297. elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
  298. if G_new.has_edge(nd1, nd2):
  299. G_new.remove_edge(nd1, nd2)
  300. # do not change anything when equal.
  301. G = G_new.copy()
  302. # update pi_p
  303. pi_p_forward = []
  304. for G_p in Gn:
  305. dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
  306. pi_p_forward.append(pi_tmp_forward)
  307. return G
  308. def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  309. Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, node_label='atom',
  310. edge_label='bond_type', connected=False):
  311. """See my name, then you know what I do.
  312. """
  313. from tqdm import tqdm
  314. # Gn_median = Gn_median[0:10]
  315. # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
  316. node_ir = np.inf # corresponding to the node remove and insertion.
  317. label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
  318. ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate,
  319. attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'],
  320. edge_label=edge_label)
  321. ite_max = 50
  322. epsilon = 0.001
  323. def generate_graph(G, pi_p_forward, label_set):
  324. G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
  325. # nx.draw_networkx(G)
  326. # import matplotlib.pyplot as plt
  327. # plt.show()
  328. # print(pi_p_forward)
  329. # update vertex labels.
  330. # pre-compute h_i0 for each label.
  331. # for label in get_node_labels(Gn, node_label):
  332. # print(label)
  333. # for nd in G.nodes(data=True):
  334. # pass
  335. if not ds_attrs['node_attr_dim']: # labels are symbolic
  336. for ndi, (nd, _) in enumerate(G.nodes(data=True)):
  337. h_i0_list = []
  338. label_list = []
  339. for label in label_set:
  340. h_i0 = 0
  341. for idx, g in enumerate(Gn_median):
  342. pi_i = pi_p_forward[idx][ndi]
  343. if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
  344. h_i0 += 1
  345. h_i0_list.append(h_i0)
  346. label_list.append(label)
  347. # case when the node is to be removed.
  348. h_i0_remove = 0
  349. for idx, g in enumerate(Gn_median):
  350. pi_i = pi_p_forward[idx][ndi]
  351. if pi_i == node_ir:
  352. h_i0_remove += 1
  353. h_i0_list.append(h_i0_remove)
  354. label_list.append(label_r)
  355. # get the best labels.
  356. idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
  357. nlabel_best = [label_list[idx] for idx in idx_max]
  358. # generate "best" graphs with regard to "best" node labels.
  359. G_new_list_nd = []
  360. for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
  361. for nl in nlabel_best:
  362. g_tmp = g.copy()
  363. if nl == label_r:
  364. g_tmp.remove_node(nd)
  365. else:
  366. g_tmp.nodes[nd][node_label] = nl
  367. G_new_list_nd.append(g_tmp)
  368. # nx.draw_networkx(g_tmp)
  369. # import matplotlib.pyplot as plt
  370. # plt.show()
  371. # print(g_tmp.nodes(data=True))
  372. # print(g_tmp.edges(data=True))
  373. G_new_list = G_new_list_nd[:]
  374. else: # labels are non-symbolic
  375. for ndi, (nd, _) in enumerate(G.nodes(data=True)):
  376. Si_norm = 0
  377. phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
  378. for idx, g in enumerate(Gn_median):
  379. pi_i = pi_p_forward[idx][ndi]
  380. if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
  381. Si_norm += 1
  382. phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
  383. phi_i_bar /= Si_norm
  384. G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
  385. # update edge labels and adjacency matrix.
  386. if ds_attrs['edge_labeled']:
  387. for nd1, nd2, _ in G.edges(data=True):
  388. h_ij0_list = []
  389. label_list = []
  390. for label in get_edge_labels(Gn_median, edge_label):
  391. h_ij0 = 0
  392. for idx, g in enumerate(Gn_median):
  393. pi_i = pi_p_forward[idx][nd1]
  394. pi_j = pi_p_forward[idx][nd2]
  395. h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
  396. g.has_edge(pi_i, pi_j) and
  397. g.edges[pi_i, pi_j][edge_label] == label)
  398. h_ij0 += h_ij0_p
  399. h_ij0_list.append(h_ij0)
  400. label_list.append(label)
  401. # choose one of the best randomly.
  402. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
  403. h_ij0_max = h_ij0_list[idx_max[0]]
  404. idx_rdm = random.randint(0, len(idx_max) - 1)
  405. best_label = label_list[idx_max[idx_rdm]]
  406. # check whether a_ij is 0 or 1.
  407. sij_norm = 0
  408. for idx, g in enumerate(Gn_median):
  409. pi_i = pi_p_forward[idx][nd1]
  410. pi_j = pi_p_forward[idx][nd2]
  411. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  412. sij_norm += 1
  413. if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
  414. if not G_new.has_edge(nd1, nd2):
  415. G_new.add_edge(nd1, nd2)
  416. G_new.edges[nd1, nd2][edge_label] = best_label
  417. else:
  418. if G_new.has_edge(nd1, nd2):
  419. G_new.remove_edge(nd1, nd2)
  420. else: # if edges are unlabeled
  421. # @todo: works only for undirected graphs.
  422. nd_list = [n for n in G.nodes()]
  423. for g_tmp in G_new_list:
  424. for nd1i in range(nx.number_of_nodes(G)):
  425. nd1 = nd_list[nd1i]
  426. for nd2i in range(nd1i + 1, nx.number_of_nodes(G)):
  427. nd2 = nd_list[nd2i]
  428. sij_norm = 0
  429. for idx, g in enumerate(Gn_median):
  430. pi_i = pi_p_forward[idx][nd1i]
  431. pi_j = pi_p_forward[idx][nd2i]
  432. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  433. sij_norm += 1
  434. if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
  435. # @todo: should we consider if nd1 and nd2 in g_tmp?
  436. # or just add the edge anyway?
  437. if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
  438. and not g_tmp.has_edge(nd1, nd2):
  439. g_tmp.add_edge(nd1, nd2)
  440. elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
  441. if g_tmp.has_edge(nd1, nd2):
  442. g_tmp.remove_edge(nd1, nd2)
  443. # do not change anything when equal.
  444. # # find the best graph generated in this iteration and update pi_p.
  445. # @todo: should we update all graphs generated or just the best ones?
  446. dis_list, pi_forward_list = median_distance(G_new_list, Gn_median)
  447. # @todo: should we remove the identical and connectivity check?
  448. # Don't know which is faster.
  449. if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
  450. G_new_list, idx_list = remove_duplicates(G_new_list)
  451. pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  452. dis_list = [dis_list[idx] for idx in idx_list]
  453. # if connected == True:
  454. # G_new_list, idx_list = remove_disconnected(G_new_list)
  455. # pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  456. # idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
  457. # dis_min = dis_list[idx_min_tmp_list[0]]
  458. # pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
  459. # G_new_list = [G_new_list[idx] for idx in idx_min_list]
  460. # for g in G_new_list:
  461. # import matplotlib.pyplot as plt
  462. # nx.draw_networkx(g)
  463. # plt.show()
  464. # print(g.nodes(data=True))
  465. # print(g.edges(data=True))
  466. return G_new_list, pi_forward_list, dis_list
  467. def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
  468. idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
  469. dis_min = dis_all[idx_min_list[0]]
  470. pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
  471. G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
  472. return G_min_list, pi_forward_min_list, dis_min
  473. def iteration_proc(G, pi_p_forward, cur_sod):
  474. G_list = [G]
  475. pi_forward_list = [pi_p_forward]
  476. old_sod = cur_sod * 2
  477. sod_list = [cur_sod]
  478. # iterations.
  479. itr = 0
  480. while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
  481. # for itr in range(0, 5): # the convergence condition?
  482. print('itr is', itr)
  483. G_new_list = []
  484. pi_forward_new_list = []
  485. dis_new_list = []
  486. for idx, G in enumerate(G_list):
  487. label_set = get_node_labels(Gn_median + [G], node_label)
  488. G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
  489. G, pi_forward_list[idx], label_set)
  490. G_new_list += G_tmp_list
  491. pi_forward_new_list += pi_forward_tmp_list
  492. dis_new_list += dis_tmp_list
  493. G_list = G_new_list[:]
  494. pi_forward_list = pi_forward_new_list[:]
  495. dis_list = dis_new_list[:]
  496. old_sod = cur_sod
  497. cur_sod = np.min(dis_list)
  498. sod_list.append(cur_sod)
  499. itr += 1
  500. # @todo: do we return all graphs or the best ones?
  501. # get the best ones of the generated graphs.
  502. G_list, pi_forward_list, dis_min = best_median_graphs(
  503. G_list, pi_forward_list, dis_list)
  504. if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
  505. G_list, idx_list = remove_duplicates(G_list)
  506. pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  507. # dis_list = [dis_list[idx] for idx in idx_list]
  508. # import matplotlib.pyplot as plt
  509. # for g in G_list:
  510. # nx.draw_networkx(g)
  511. # plt.show()
  512. # print(g.nodes(data=True))
  513. # print(g.edges(data=True))
  514. print('\nsods:', sod_list, '\n')
  515. return G_list, pi_forward_list, dis_min
  516. def remove_duplicates(Gn):
  517. """Remove duplicate graphs from list.
  518. """
  519. Gn_new = []
  520. idx_list = []
  521. for idx, g in enumerate(Gn):
  522. dupl = False
  523. for g_new in Gn_new:
  524. if graph_isIdentical(g_new, g):
  525. dupl = True
  526. break
  527. if not dupl:
  528. Gn_new.append(g)
  529. idx_list.append(idx)
  530. return Gn_new, idx_list
  531. def remove_disconnected(Gn):
  532. """Remove disconnected graphs from list.
  533. """
  534. Gn_new = []
  535. idx_list = []
  536. for idx, g in enumerate(Gn):
  537. if nx.is_connected(g):
  538. Gn_new.append(g)
  539. idx_list.append(idx)
  540. return Gn_new, idx_list
  541. # phase 1: initilize.
  542. # compute set-median.
  543. dis_min = np.inf
  544. dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median)
  545. # find all smallest distances.
  546. idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
  547. dis_min = dis_list[idx_min_list[0]]
  548. # phase 2: iteration.
  549. G_list = []
  550. dis_list = []
  551. pi_forward_list = []
  552. for idx_min in idx_min_list:
  553. # print('idx_min is', idx_min)
  554. G = Gn_candidate[idx_min].copy()
  555. # list of edit operations.
  556. pi_p_forward = pi_forward_all[idx_min]
  557. # pi_p_backward = pi_all_backward[idx_min]
  558. Gi_list, pi_i_forward_list, dis_i_min = iteration_proc(G, pi_p_forward, dis_min)
  559. G_list += Gi_list
  560. dis_list.append(dis_i_min)
  561. pi_forward_list += pi_i_forward_list
  562. if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
  563. G_list, idx_list = remove_duplicates(G_list)
  564. dis_list = [dis_list[idx] for idx in idx_list]
  565. pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  566. if connected == True:
  567. G_list_con, idx_list = remove_disconnected(G_list)
  568. # if there is no connected graphs at all, then remain the disconnected ones.
  569. if len(G_list_con) > 0: # @todo: ??????????????????????????
  570. G_list = G_list_con
  571. dis_list = [dis_list[idx] for idx in idx_list]
  572. pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  573. # import matplotlib.pyplot as plt
  574. # for g in G_list:
  575. # nx.draw_networkx(g)
  576. # plt.show()
  577. # print(g.nodes(data=True))
  578. # print(g.edges(data=True))
  579. # get the best median graphs
  580. # dis_list, pi_forward_list = median_distance(G_list, Gn_median)
  581. G_min_list, pi_forward_min_list, dis_min = best_median_graphs(
  582. G_list, pi_forward_list, dis_list)
  583. # for g in G_min_list:
  584. # nx.draw_networkx(g)
  585. # plt.show()
  586. # print(g.nodes(data=True))
  587. # print(g.edges(data=True))
  588. return G_min_list, dis_min
  589. if __name__ == '__main__':
  590. from pygraph.utils.graphfiles import loadDataset
  591. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
  592. 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
  593. # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  594. # 'extra_params': {}} # node nsymb
  595. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  596. # 'extra_params': {}}
  597. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  598. iam(Gn)

A Python package for graph kernels, graph edit distances and graph pre-image problem.