You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 24 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. import networkx as nx
  2. import numpy as np
  3. from copy import deepcopy
  4. from enum import Enum, unique
  5. #from itertools import product
  6. # from tqdm import tqdm
  7. #%%
  8. def getSPLengths(G1):
  9. sp = nx.shortest_path(G1)
  10. distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
  11. for i in sp.keys():
  12. for j in sp[i].keys():
  13. distances[i, j] = len(sp[i][j]) - 1
  14. return distances
  15. def getSPGraph(G, edge_weight=None):
  16. """Transform graph G to its corresponding shortest-paths graph.
  17. Parameters
  18. ----------
  19. G : NetworkX graph
  20. The graph to be tramsformed.
  21. edge_weight : string
  22. edge attribute corresponding to the edge weight.
  23. Return
  24. ------
  25. S : NetworkX graph
  26. The shortest-paths graph corresponding to G.
  27. Notes
  28. ------
  29. For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
  30. References
  31. ----------
  32. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  33. """
  34. return floydTransformation(G, edge_weight=edge_weight)
  35. def floydTransformation(G, edge_weight=None):
  36. """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
  37. Parameters
  38. ----------
  39. G : NetworkX graph
  40. The graph to be tramsformed.
  41. edge_weight : string
  42. edge attribute corresponding to the edge weight. The default edge weight is bond_type.
  43. Return
  44. ------
  45. S : NetworkX graph
  46. The shortest-paths graph corresponding to G.
  47. References
  48. ----------
  49. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  50. """
  51. spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
  52. S = nx.Graph()
  53. S.add_nodes_from(G.nodes(data=True))
  54. ns = list(G.nodes())
  55. for i in range(0, G.number_of_nodes()):
  56. for j in range(i + 1, G.number_of_nodes()):
  57. if spMatrix[i, j] != np.inf:
  58. S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
  59. return S
  60. def get_shortest_paths(G, weight, directed):
  61. """Get all shortest paths of a graph.
  62. Parameters
  63. ----------
  64. G : NetworkX graphs
  65. The graphs whose paths are calculated.
  66. weight : string/None
  67. edge attribute used as weight to calculate the shortest path.
  68. directed: boolean
  69. Whether graph is directed.
  70. Return
  71. ------
  72. sp : list of list
  73. List of shortest paths of the graph, where each path is represented by a list of nodes.
  74. """
  75. from itertools import combinations
  76. sp = []
  77. for n1, n2 in combinations(G.nodes(), 2):
  78. try:
  79. spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
  80. except nx.NetworkXNoPath: # nodes not connected
  81. pass
  82. else:
  83. sp += spltemp
  84. # each edge walk is counted twice, starting from both its extreme nodes.
  85. if not directed:
  86. sp += [sptemp[::-1] for sptemp in spltemp]
  87. # add single nodes as length 0 paths.
  88. sp += [[n] for n in G.nodes()]
  89. return sp
  90. def untotterTransformation(G, node_label, edge_label):
  91. """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
  92. Parameters
  93. ----------
  94. G : NetworkX graph
  95. The graph to be tramsformed.
  96. node_label : string
  97. node attribute used as label. The default node label is 'atom'.
  98. edge_label : string
  99. edge attribute used as label. The default edge label is 'bond_type'.
  100. Return
  101. ------
  102. gt : NetworkX graph
  103. The transformed graph corresponding to G.
  104. References
  105. ----------
  106. .. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  107. """
  108. # arrange all graphs in a list
  109. G = G.to_directed()
  110. gt = nx.Graph()
  111. gt.graph = G.graph
  112. gt.add_nodes_from(G.nodes(data=True))
  113. for edge in G.edges():
  114. gt.add_node(edge)
  115. gt.nodes[edge].update({node_label: G.nodes[edge[1]][node_label]})
  116. gt.add_edge(edge[0], edge)
  117. gt.edges[edge[0], edge].update({
  118. edge_label:
  119. G[edge[0]][edge[1]][edge_label]
  120. })
  121. for neighbor in G[edge[1]]:
  122. if neighbor != edge[0]:
  123. gt.add_edge(edge, (edge[1], neighbor))
  124. gt.edges[edge, (edge[1], neighbor)].update({
  125. edge_label:
  126. G[edge[1]][neighbor][edge_label]
  127. })
  128. # nx.draw_networkx(gt)
  129. # plt.show()
  130. # relabel nodes using consecutive integers for convenience of kernel calculation.
  131. gt = nx.convert_node_labels_to_integers(
  132. gt, first_label=0, label_attribute='label_orignal')
  133. return gt
  134. def direct_product(G1, G2, node_label, edge_label):
  135. """Return the direct/tensor product of directed graphs G1 and G2.
  136. Parameters
  137. ----------
  138. G1, G2 : NetworkX graph
  139. The original graphs.
  140. node_label : string
  141. node attribute used as label. The default node label is 'atom'.
  142. edge_label : string
  143. edge attribute used as label. The default edge label is 'bond_type'.
  144. Return
  145. ------
  146. gt : NetworkX graph
  147. The direct product graph of G1 and G2.
  148. Notes
  149. -----
  150. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  151. References
  152. ----------
  153. .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  154. """
  155. # arrange all graphs in a list
  156. from itertools import product
  157. # G = G.to_directed()
  158. gt = nx.DiGraph()
  159. # add nodes
  160. for u, v in product(G1, G2):
  161. if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
  162. gt.add_node((u, v))
  163. gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
  164. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  165. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  166. if (u1, u2) in gt and (
  167. v1, v2
  168. ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
  169. v2][edge_label]:
  170. gt.add_edge((u1, u2), (v1, v2))
  171. gt.edges[(u1, u2), (v1, v2)].update({
  172. edge_label:
  173. G1.edges[u1, v1][edge_label]
  174. })
  175. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  176. # for u, v in product(gt, gt):
  177. # if (u[0], v[0]) in G1.edges and (
  178. # u[1], v[1]
  179. # ) in G2.edges and G1.edges[u[0],
  180. # v[0]][edge_label] == G2.edges[u[1],
  181. # v[1]][edge_label]:
  182. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  183. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  184. # edge_label:
  185. # G1.edges[u[0], v[0]][edge_label]
  186. # })
  187. # relabel nodes using consecutive integers for convenience of kernel calculation.
  188. # gt = nx.convert_node_labels_to_integers(
  189. # gt, first_label=0, label_attribute='label_orignal')
  190. return gt
  191. def direct_product_graph(G1, G2, node_labels, edge_labels):
  192. """Return the direct/tensor product of directed graphs G1 and G2.
  193. Parameters
  194. ----------
  195. G1, G2 : NetworkX graph
  196. The original graphs.
  197. node_labels : list
  198. A list of node attributes used as labels.
  199. edge_labels : list
  200. A list of edge attributes used as labels.
  201. Return
  202. ------
  203. gt : NetworkX graph
  204. The direct product graph of G1 and G2.
  205. Notes
  206. -----
  207. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  208. References
  209. ----------
  210. .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  211. """
  212. # arrange all graphs in a list
  213. from itertools import product
  214. # G = G.to_directed()
  215. gt = nx.DiGraph()
  216. # add nodes
  217. for u, v in product(G1, G2):
  218. label1 = tuple(G1.nodes[u][nl] for nl in node_labels)
  219. label2 = tuple(G2.nodes[v][nl] for nl in node_labels)
  220. if label1 == label2:
  221. gt.add_node((u, v), node_label=label1)
  222. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  223. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  224. if (u1, u2) in gt and (v1, v2) in gt:
  225. label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels)
  226. label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels)
  227. if label1 == label2:
  228. gt.add_edge((u1, u2), (v1, v2), edge_label=label1)
  229. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  230. # for u, v in product(gt, gt):
  231. # if (u[0], v[0]) in G1.edges and (
  232. # u[1], v[1]
  233. # ) in G2.edges and G1.edges[u[0],
  234. # v[0]][edge_label] == G2.edges[u[1],
  235. # v[1]][edge_label]:
  236. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  237. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  238. # edge_label:
  239. # G1.edges[u[0], v[0]][edge_label]
  240. # })
  241. # relabel nodes using consecutive integers for convenience of kernel calculation.
  242. # gt = nx.convert_node_labels_to_integers(
  243. # gt, first_label=0, label_attribute='label_orignal')
  244. return gt
  245. def find_paths(G, source_node, length):
  246. """Find all paths with a certain length those start from a source node.
  247. A recursive depth first search is applied.
  248. Parameters
  249. ----------
  250. G : NetworkX graphs
  251. The graph in which paths are searched.
  252. source_node : integer
  253. The number of the node from where all paths start.
  254. length : integer
  255. The length of paths.
  256. Return
  257. ------
  258. path : list of list
  259. List of paths retrieved, where each path is represented by a list of nodes.
  260. """
  261. if length == 0:
  262. return [[source_node]]
  263. path = [[source_node] + path for neighbor in G[source_node] \
  264. for path in find_paths(G, neighbor, length - 1) if source_node not in path]
  265. return path
  266. def find_all_paths(G, length, is_directed):
  267. """Find all paths with a certain length in a graph. A recursive depth first
  268. search is applied.
  269. Parameters
  270. ----------
  271. G : NetworkX graphs
  272. The graph in which paths are searched.
  273. length : integer
  274. The length of paths.
  275. Return
  276. ------
  277. path : list of list
  278. List of paths retrieved, where each path is represented by a list of nodes.
  279. """
  280. all_paths = []
  281. for node in G:
  282. all_paths.extend(find_paths(G, node, length))
  283. if not is_directed:
  284. # For each path, two presentations are retrieved from its two extremities.
  285. # Remove one of them.
  286. all_paths_r = [path[::-1] for path in all_paths]
  287. for idx, path in enumerate(all_paths[:-1]):
  288. for path2 in all_paths_r[idx+1::]:
  289. if path == path2:
  290. all_paths[idx] = []
  291. break
  292. all_paths = list(filter(lambda a: a != [], all_paths))
  293. return all_paths
  294. # @todo: use it in ShortestPath.
  295. def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
  296. """Compute kernels between each pair of vertices in two graphs.
  297. Parameters
  298. ----------
  299. g1, g2 : NetworkX graph
  300. The kernels bewteen pairs of vertices in these two graphs are computed.
  301. node_kernels : dict
  302. A dictionary of kernel functions for nodes, including 3 items: 'symb'
  303. for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
  304. for both labels. The first 2 functions take two node labels as
  305. parameters, and the 'mix' function takes 4 parameters, a symbolic and a
  306. non-symbolic label for each the two nodes. Each label is in form of 2-D
  307. dimension array (n_samples, n_features). Each function returns a number
  308. as the kernel value. Ignored when nodes are unlabeled. This argument
  309. is designated to conjugate gradient method and fixed-point iterations.
  310. node_labels : list, optional
  311. The list of the name strings of the node labels. The default is [].
  312. node_attrs : list, optional
  313. The list of the name strings of the node attributes. The default is [].
  314. Returns
  315. -------
  316. vk_dict : dict
  317. Vertex kernels keyed by vertices.
  318. Notes
  319. -----
  320. This function is used by ``gklearn.kernels.FixedPoint'' and
  321. ``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1].
  322. References
  323. ----------
  324. .. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
  325. Parallelization of shortest path graph kernels on multi-core cpus and gpus.
  326. Proceedings of the Programmability Issues for Heterogeneous Multicores
  327. (MultiProg), Vienna, Austria, 2014.
  328. """
  329. vk_dict = {} # shortest path matrices dict
  330. if len(node_labels) > 0:
  331. # node symb and non-synb labeled
  332. if len(node_attrs) > 0:
  333. kn = node_kernels['mix']
  334. for n1 in g1.nodes(data=True):
  335. for n2 in g2.nodes(data=True):
  336. n1_labels = [n1[1][nl] for nl in node_labels]
  337. n2_labels = [n2[1][nl] for nl in node_labels]
  338. n1_attrs = [n1[1][na] for na in node_attrs]
  339. n2_attrs = [n2[1][na] for na in node_attrs]
  340. vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
  341. # node symb labeled
  342. else:
  343. kn = node_kernels['symb']
  344. for n1 in g1.nodes(data=True):
  345. for n2 in g2.nodes(data=True):
  346. n1_labels = [n1[1][nl] for nl in node_labels]
  347. n2_labels = [n2[1][nl] for nl in node_labels]
  348. vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
  349. else:
  350. # node non-synb labeled
  351. if len(node_attrs) > 0:
  352. kn = node_kernels['nsymb']
  353. for n1 in g1.nodes(data=True):
  354. for n2 in g2.nodes(data=True):
  355. n1_attrs = [n1[1][na] for na in node_attrs]
  356. n2_attrs = [n2[1][na] for na in node_attrs]
  357. vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
  358. # node unlabeled
  359. else:
  360. pass # @todo: add edge weights.
  361. # for e1 in g1.edges(data=True):
  362. # for e2 in g2.edges(data=True):
  363. # if e1[2]['cost'] == e2[2]['cost']:
  364. # kernel += 1
  365. # return kernel
  366. return vk_dict
  367. #%%
  368. def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}, **kwargs):
  369. if len(kwargs) != 0:
  370. kernel_options = kwargs
  371. if name == 'CommonWalk' or name == 'common walk':
  372. from gklearn.kernels import CommonWalk
  373. graph_kernel = CommonWalk(node_labels=node_labels,
  374. edge_labels=edge_labels,
  375. ds_infos=ds_infos,
  376. **kernel_options)
  377. elif name == 'Marginalized' or name == 'marginalized':
  378. from gklearn.kernels import Marginalized
  379. graph_kernel = Marginalized(node_labels=node_labels,
  380. edge_labels=edge_labels,
  381. ds_infos=ds_infos,
  382. **kernel_options)
  383. elif name == 'SylvesterEquation' or name == 'sylvester equation':
  384. from gklearn.kernels import SylvesterEquation
  385. graph_kernel = SylvesterEquation(
  386. ds_infos=ds_infos,
  387. **kernel_options)
  388. elif name == 'FixedPoint' or name == 'fixed point':
  389. from gklearn.kernels import FixedPoint
  390. graph_kernel = FixedPoint(node_labels=node_labels,
  391. edge_labels=edge_labels,
  392. node_attrs=node_attrs,
  393. edge_attrs=edge_attrs,
  394. ds_infos=ds_infos,
  395. **kernel_options)
  396. elif name == 'ConjugateGradient' or name == 'conjugate gradient':
  397. from gklearn.kernels import ConjugateGradient
  398. graph_kernel = ConjugateGradient(node_labels=node_labels,
  399. edge_labels=edge_labels,
  400. node_attrs=node_attrs,
  401. edge_attrs=edge_attrs,
  402. ds_infos=ds_infos,
  403. **kernel_options)
  404. elif name == 'SpectralDecomposition' or name == 'spectral decomposition':
  405. from gklearn.kernels import SpectralDecomposition
  406. graph_kernel = SpectralDecomposition(node_labels=node_labels,
  407. edge_labels=edge_labels,
  408. node_attrs=node_attrs,
  409. edge_attrs=edge_attrs,
  410. ds_infos=ds_infos,
  411. **kernel_options)
  412. elif name == 'ShortestPath' or name == 'shortest path':
  413. from gklearn.kernels import ShortestPath
  414. graph_kernel = ShortestPath(node_labels=node_labels,
  415. node_attrs=node_attrs,
  416. ds_infos=ds_infos,
  417. **kernel_options)
  418. elif name == 'StructuralSP' or name == 'structural shortest path':
  419. from gklearn.kernels import StructuralSP
  420. graph_kernel = StructuralSP(node_labels=node_labels,
  421. edge_labels=edge_labels,
  422. node_attrs=node_attrs,
  423. edge_attrs=edge_attrs,
  424. ds_infos=ds_infos,
  425. **kernel_options)
  426. elif name == 'PathUpToH' or name == 'path up to length h':
  427. from gklearn.kernels import PathUpToH
  428. graph_kernel = PathUpToH(node_labels=node_labels,
  429. edge_labels=edge_labels,
  430. ds_infos=ds_infos,
  431. **kernel_options)
  432. elif name == 'Treelet' or name == 'treelet':
  433. from gklearn.kernels import Treelet
  434. graph_kernel = Treelet(node_labels=node_labels,
  435. edge_labels=edge_labels,
  436. ds_infos=ds_infos,
  437. **kernel_options)
  438. elif name == 'WLSubtree' or name == 'weisfeiler-lehman subtree':
  439. from gklearn.kernels import WLSubtree
  440. graph_kernel = WLSubtree(node_labels=node_labels,
  441. edge_labels=edge_labels,
  442. ds_infos=ds_infos,
  443. **kernel_options)
  444. elif name == 'WeisfeilerLehman' or name == 'weisfeiler-lehman':
  445. from gklearn.kernels import WeisfeilerLehman
  446. graph_kernel = WeisfeilerLehman(node_labels=node_labels,
  447. edge_labels=edge_labels,
  448. ds_infos=ds_infos,
  449. **kernel_options)
  450. else:
  451. raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".')
  452. return graph_kernel
  453. def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False):
  454. import os
  455. from gklearn.utils import Dataset, split_dataset_by_target
  456. # 1. get dataset.
  457. print('1. getting dataset...')
  458. dataset_all = Dataset()
  459. dataset_all.load_predefined_dataset(ds_name)
  460. dataset_all.trim_dataset(edge_required=edge_required)
  461. if not irrelevant_labels is None:
  462. dataset_all.remove_labels(**irrelevant_labels)
  463. # dataset_all.cut_graphs(range(0, 10))
  464. datasets = split_dataset_by_target(dataset_all)
  465. gram_matrix_unnorm_list = []
  466. run_time_list = []
  467. print('start generating preimage for each class of target...')
  468. for idx, dataset in enumerate(datasets):
  469. target = dataset.targets[0]
  470. print('\ntarget =', target, '\n')
  471. # 2. initialize graph kernel.
  472. print('2. initializing graph kernel and setting parameters...')
  473. graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
  474. node_labels=dataset.node_labels,
  475. edge_labels=dataset.edge_labels,
  476. node_attrs=dataset.node_attrs,
  477. edge_attrs=dataset.edge_attrs,
  478. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  479. kernel_options=kernel_options)
  480. # 3. compute gram matrix.
  481. print('3. computing gram matrix...')
  482. gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
  483. gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
  484. gram_matrix_unnorm_list.append(gram_matrix_unnorm)
  485. run_time_list.append(run_time)
  486. # 4. save results.
  487. print()
  488. print('4. saving results...')
  489. if save_results:
  490. os.makedirs(dir_save, exist_ok=True)
  491. np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)
  492. print('\ncomplete.')
  493. def normalize_gram_matrix(gram_matrix):
  494. diag = gram_matrix.diagonal().copy()
  495. old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
  496. for i in range(len(gram_matrix)):
  497. for j in range(i, len(gram_matrix)):
  498. try:
  499. gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
  500. except:
  501. # rollback()
  502. np.seterr(**old_settings)
  503. raise
  504. else:
  505. gram_matrix[j][i] = gram_matrix[i][j]
  506. np.seterr(**old_settings)
  507. return gram_matrix
  508. def compute_distance_matrix(gram_matrix):
  509. dis_mat = np.empty((len(gram_matrix), len(gram_matrix)))
  510. for i in range(len(gram_matrix)):
  511. for j in range(i, len(gram_matrix)):
  512. dis = gram_matrix[i, i] + gram_matrix[j, j] - 2 * gram_matrix[i, j]
  513. if dis < 0:
  514. if dis > -1e-10:
  515. dis = 0
  516. else:
  517. raise ValueError('The distance is negative.')
  518. dis_mat[i, j] = np.sqrt(dis)
  519. dis_mat[j, i] = dis_mat[i, j]
  520. dis_max = np.max(np.max(dis_mat))
  521. dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
  522. dis_mean = np.mean(np.mean(dis_mat))
  523. return dis_mat, dis_max, dis_min, dis_mean
  524. #%%
  525. def graph_deepcopy(G):
  526. """Deep copy a graph, including deep copy of all nodes, edges and
  527. attributes of the graph, nodes and edges.
  528. Note
  529. ----
  530. - It is the same as the NetworkX function graph.copy(), as far as I know.
  531. - This function only supports Networkx.Graph and Networkx.DiGraph.
  532. """
  533. # add graph attributes.
  534. labels = {}
  535. for k, v in G.graph.items():
  536. labels[k] = deepcopy(v)
  537. if G.is_directed():
  538. G_copy = nx.DiGraph(**labels)
  539. else:
  540. G_copy = nx.Graph(**labels)
  541. # add nodes
  542. for nd, attrs in G.nodes(data=True):
  543. labels = {}
  544. for k, v in attrs.items():
  545. labels[k] = deepcopy(v)
  546. G_copy.add_node(nd, **labels)
  547. # add edges.
  548. for nd1, nd2, attrs in G.edges(data=True):
  549. labels = {}
  550. for k, v in attrs.items():
  551. labels[k] = deepcopy(v)
  552. G_copy.add_edge(nd1, nd2, **labels)
  553. return G_copy
  554. def graph_isIdentical(G1, G2):
  555. """Check if two graphs are identical, including: same nodes, edges, node
  556. labels/attributes, edge labels/attributes.
  557. Notes
  558. -----
  559. 1. The type of graphs has to be the same.
  560. 2. Global/Graph attributes are neglected as they may contain names for graphs.
  561. """
  562. # check nodes.
  563. nlist1 = [n for n in G1.nodes(data=True)]
  564. nlist2 = [n for n in G2.nodes(data=True)]
  565. if not nlist1 == nlist2:
  566. return False
  567. # check edges.
  568. elist1 = [n for n in G1.edges(data=True)]
  569. elist2 = [n for n in G2.edges(data=True)]
  570. if not elist1 == elist2:
  571. return False
  572. # check graph attributes.
  573. return True
  574. def get_node_labels(Gn, node_label):
  575. """Get node labels of dataset Gn.
  576. """
  577. nl = set()
  578. for G in Gn:
  579. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  580. return nl
  581. def get_edge_labels(Gn, edge_label):
  582. """Get edge labels of dataset Gn.
  583. """
  584. el = set()
  585. for G in Gn:
  586. el = el | set(nx.get_edge_attributes(G, edge_label).values())
  587. return el
  588. def get_mlti_dim_node_attrs(G, attr_names):
  589. attributes = []
  590. for nd, attrs in G.nodes(data=True):
  591. attributes.append(tuple(attrs[aname] for aname in attr_names))
  592. return attributes
  593. def get_mlti_dim_edge_attrs(G, attr_names):
  594. attributes = []
  595. for ed, attrs in G.edges(data=True):
  596. attributes.append(tuple(attrs[aname] for aname in attr_names))
  597. return attributes
  598. def nx_permute_nodes(G, random_state=None):
  599. """Permute node indices in a NetworkX graph.
  600. Parameters
  601. ----------
  602. G : TYPE
  603. DESCRIPTION.
  604. random_state : TYPE, optional
  605. DESCRIPTION. The default is None.
  606. Returns
  607. -------
  608. G_new : TYPE
  609. DESCRIPTION.
  610. Notes
  611. -----
  612. - This function only supports Networkx.Graph and Networkx.DiGraph.
  613. """
  614. # @todo: relabel node with integers? (in case something went wrong...)
  615. # Add graph attributes.
  616. labels = {}
  617. for k, v in G.graph.items():
  618. labels[k] = deepcopy(v)
  619. if G.is_directed():
  620. G_new = nx.DiGraph(**labels)
  621. else:
  622. G_new = nx.Graph(**labels)
  623. # Create a random mapping old node indices <-> new indices.
  624. nb_nodes = nx.number_of_nodes(G)
  625. indices_orig = range(nb_nodes)
  626. idx_mapping = np.random.RandomState(seed=random_state).permutation(indices_orig)
  627. # Add nodes.
  628. nodes_orig = list(G.nodes)
  629. for i_orig in range(nb_nodes):
  630. i_new = idx_mapping[i_orig]
  631. labels = {}
  632. for k, v in G.nodes[nodes_orig[i_new]].items():
  633. labels[k] = deepcopy(v)
  634. G_new.add_node(nodes_orig[i_new], **labels)
  635. # Add edges.
  636. for nd1, nd2, attrs in G.edges(data=True):
  637. labels = {}
  638. for k, v in attrs.items():
  639. labels[k] = deepcopy(v)
  640. G_new.add_edge(nd1, nd2, **labels)
  641. # # create a random mapping old label -> new label
  642. # node_mapping = dict(zip(G.nodes(), np.random.RandomState(seed=random_state).permutation(G.nodes())))
  643. # # build a new graph
  644. # G_new = nx.relabel_nodes(G, node_mapping)
  645. return G_new
  646. #%%
  647. def dummy_node():
  648. """
  649. /*!
  650. * @brief Returns a dummy node.
  651. * @return ID of dummy node.
  652. */
  653. """
  654. return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why.
  655. def undefined_node():
  656. """
  657. /*!
  658. * @brief Returns an undefined node.
  659. * @return ID of undefined node.
  660. */
  661. """
  662. return np.inf
  663. def dummy_edge():
  664. """
  665. /*!
  666. * @brief Returns a dummy edge.
  667. * @return ID of dummy edge.
  668. */
  669. """
  670. return np.inf
  671. @unique
  672. class SpecialLabel(Enum):
  673. """can be used to define special labels.
  674. """
  675. DUMMY = 1 # The dummy label.
  676. # DUMMY = auto # enum.auto does not exist in Python 3.5.

A Python package for graph kernels, graph edit distances and graph pre-image problem.