You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 23 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741
  1. import networkx as nx
  2. import numpy as np
  3. from copy import deepcopy
  4. from enum import Enum, unique
  5. #from itertools import product
  6. # from tqdm import tqdm
  7. def getSPLengths(G1):
  8. sp = nx.shortest_path(G1)
  9. distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
  10. for i in sp.keys():
  11. for j in sp[i].keys():
  12. distances[i, j] = len(sp[i][j]) - 1
  13. return distances
  14. def getSPGraph(G, edge_weight=None):
  15. """Transform graph G to its corresponding shortest-paths graph.
  16. Parameters
  17. ----------
  18. G : NetworkX graph
  19. The graph to be tramsformed.
  20. edge_weight : string
  21. edge attribute corresponding to the edge weight.
  22. Return
  23. ------
  24. S : NetworkX graph
  25. The shortest-paths graph corresponding to G.
  26. Notes
  27. ------
  28. For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
  29. References
  30. ----------
  31. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  32. """
  33. return floydTransformation(G, edge_weight=edge_weight)
  34. def floydTransformation(G, edge_weight=None):
  35. """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
  36. Parameters
  37. ----------
  38. G : NetworkX graph
  39. The graph to be tramsformed.
  40. edge_weight : string
  41. edge attribute corresponding to the edge weight. The default edge weight is bond_type.
  42. Return
  43. ------
  44. S : NetworkX graph
  45. The shortest-paths graph corresponding to G.
  46. References
  47. ----------
  48. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  49. """
  50. spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
  51. S = nx.Graph()
  52. S.add_nodes_from(G.nodes(data=True))
  53. ns = list(G.nodes())
  54. for i in range(0, G.number_of_nodes()):
  55. for j in range(i + 1, G.number_of_nodes()):
  56. if spMatrix[i, j] != np.inf:
  57. S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
  58. return S
  59. def get_shortest_paths(G, weight, directed):
  60. """Get all shortest paths of a graph.
  61. Parameters
  62. ----------
  63. G : NetworkX graphs
  64. The graphs whose paths are calculated.
  65. weight : string/None
  66. edge attribute used as weight to calculate the shortest path.
  67. directed: boolean
  68. Whether graph is directed.
  69. Return
  70. ------
  71. sp : list of list
  72. List of shortest paths of the graph, where each path is represented by a list of nodes.
  73. """
  74. from itertools import combinations
  75. sp = []
  76. for n1, n2 in combinations(G.nodes(), 2):
  77. try:
  78. spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
  79. except nx.NetworkXNoPath: # nodes not connected
  80. pass
  81. else:
  82. sp += spltemp
  83. # each edge walk is counted twice, starting from both its extreme nodes.
  84. if not directed:
  85. sp += [sptemp[::-1] for sptemp in spltemp]
  86. # add single nodes as length 0 paths.
  87. sp += [[n] for n in G.nodes()]
  88. return sp
  89. def untotterTransformation(G, node_label, edge_label):
  90. """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
  91. Parameters
  92. ----------
  93. G : NetworkX graph
  94. The graph to be tramsformed.
  95. node_label : string
  96. node attribute used as label. The default node label is 'atom'.
  97. edge_label : string
  98. edge attribute used as label. The default edge label is 'bond_type'.
  99. Return
  100. ------
  101. gt : NetworkX graph
  102. The transformed graph corresponding to G.
  103. References
  104. ----------
  105. .. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  106. """
  107. # arrange all graphs in a list
  108. G = G.to_directed()
  109. gt = nx.Graph()
  110. gt.graph = G.graph
  111. gt.add_nodes_from(G.nodes(data=True))
  112. for edge in G.edges():
  113. gt.add_node(edge)
  114. gt.nodes[edge].update({node_label: G.nodes[edge[1]][node_label]})
  115. gt.add_edge(edge[0], edge)
  116. gt.edges[edge[0], edge].update({
  117. edge_label:
  118. G[edge[0]][edge[1]][edge_label]
  119. })
  120. for neighbor in G[edge[1]]:
  121. if neighbor != edge[0]:
  122. gt.add_edge(edge, (edge[1], neighbor))
  123. gt.edges[edge, (edge[1], neighbor)].update({
  124. edge_label:
  125. G[edge[1]][neighbor][edge_label]
  126. })
  127. # nx.draw_networkx(gt)
  128. # plt.show()
  129. # relabel nodes using consecutive integers for convenience of kernel calculation.
  130. gt = nx.convert_node_labels_to_integers(
  131. gt, first_label=0, label_attribute='label_orignal')
  132. return gt
  133. def direct_product(G1, G2, node_label, edge_label):
  134. """Return the direct/tensor product of directed graphs G1 and G2.
  135. Parameters
  136. ----------
  137. G1, G2 : NetworkX graph
  138. The original graphs.
  139. node_label : string
  140. node attribute used as label. The default node label is 'atom'.
  141. edge_label : string
  142. edge attribute used as label. The default edge label is 'bond_type'.
  143. Return
  144. ------
  145. gt : NetworkX graph
  146. The direct product graph of G1 and G2.
  147. Notes
  148. -----
  149. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  150. References
  151. ----------
  152. .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  153. """
  154. # arrange all graphs in a list
  155. from itertools import product
  156. # G = G.to_directed()
  157. gt = nx.DiGraph()
  158. # add nodes
  159. for u, v in product(G1, G2):
  160. if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
  161. gt.add_node((u, v))
  162. gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
  163. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  164. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  165. if (u1, u2) in gt and (
  166. v1, v2
  167. ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
  168. v2][edge_label]:
  169. gt.add_edge((u1, u2), (v1, v2))
  170. gt.edges[(u1, u2), (v1, v2)].update({
  171. edge_label:
  172. G1.edges[u1, v1][edge_label]
  173. })
  174. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  175. # for u, v in product(gt, gt):
  176. # if (u[0], v[0]) in G1.edges and (
  177. # u[1], v[1]
  178. # ) in G2.edges and G1.edges[u[0],
  179. # v[0]][edge_label] == G2.edges[u[1],
  180. # v[1]][edge_label]:
  181. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  182. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  183. # edge_label:
  184. # G1.edges[u[0], v[0]][edge_label]
  185. # })
  186. # relabel nodes using consecutive integers for convenience of kernel calculation.
  187. # gt = nx.convert_node_labels_to_integers(
  188. # gt, first_label=0, label_attribute='label_orignal')
  189. return gt
  190. def direct_product_graph(G1, G2, node_labels, edge_labels):
  191. """Return the direct/tensor product of directed graphs G1 and G2.
  192. Parameters
  193. ----------
  194. G1, G2 : NetworkX graph
  195. The original graphs.
  196. node_labels : list
  197. A list of node attributes used as labels.
  198. edge_labels : list
  199. A list of edge attributes used as labels.
  200. Return
  201. ------
  202. gt : NetworkX graph
  203. The direct product graph of G1 and G2.
  204. Notes
  205. -----
  206. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  207. References
  208. ----------
  209. .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  210. """
  211. # arrange all graphs in a list
  212. from itertools import product
  213. # G = G.to_directed()
  214. gt = nx.DiGraph()
  215. # add nodes
  216. for u, v in product(G1, G2):
  217. label1 = tuple(G1.nodes[u][nl] for nl in node_labels)
  218. label2 = tuple(G2.nodes[v][nl] for nl in node_labels)
  219. if label1 == label2:
  220. gt.add_node((u, v), node_label=label1)
  221. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  222. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  223. if (u1, u2) in gt and (v1, v2) in gt:
  224. label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels)
  225. label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels)
  226. if label1 == label2:
  227. gt.add_edge((u1, u2), (v1, v2), edge_label=label1)
  228. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  229. # for u, v in product(gt, gt):
  230. # if (u[0], v[0]) in G1.edges and (
  231. # u[1], v[1]
  232. # ) in G2.edges and G1.edges[u[0],
  233. # v[0]][edge_label] == G2.edges[u[1],
  234. # v[1]][edge_label]:
  235. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  236. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  237. # edge_label:
  238. # G1.edges[u[0], v[0]][edge_label]
  239. # })
  240. # relabel nodes using consecutive integers for convenience of kernel calculation.
  241. # gt = nx.convert_node_labels_to_integers(
  242. # gt, first_label=0, label_attribute='label_orignal')
  243. return gt
  244. def graph_deepcopy(G):
  245. """Deep copy a graph, including deep copy of all nodes, edges and
  246. attributes of the graph, nodes and edges.
  247. Note
  248. ----
  249. It is the same as the NetworkX function graph.copy(), as far as I know.
  250. """
  251. # add graph attributes.
  252. labels = {}
  253. for k, v in G.graph.items():
  254. labels[k] = deepcopy(v)
  255. if G.is_directed():
  256. G_copy = nx.DiGraph(**labels)
  257. else:
  258. G_copy = nx.Graph(**labels)
  259. # add nodes
  260. for nd, attrs in G.nodes(data=True):
  261. labels = {}
  262. for k, v in attrs.items():
  263. labels[k] = deepcopy(v)
  264. G_copy.add_node(nd, **labels)
  265. # add edges.
  266. for nd1, nd2, attrs in G.edges(data=True):
  267. labels = {}
  268. for k, v in attrs.items():
  269. labels[k] = deepcopy(v)
  270. G_copy.add_edge(nd1, nd2, **labels)
  271. return G_copy
  272. def graph_isIdentical(G1, G2):
  273. """Check if two graphs are identical, including: same nodes, edges, node
  274. labels/attributes, edge labels/attributes.
  275. Notes
  276. -----
  277. 1. The type of graphs has to be the same.
  278. 2. Global/Graph attributes are neglected as they may contain names for graphs.
  279. """
  280. # check nodes.
  281. nlist1 = [n for n in G1.nodes(data=True)]
  282. nlist2 = [n for n in G2.nodes(data=True)]
  283. if not nlist1 == nlist2:
  284. return False
  285. # check edges.
  286. elist1 = [n for n in G1.edges(data=True)]
  287. elist2 = [n for n in G2.edges(data=True)]
  288. if not elist1 == elist2:
  289. return False
  290. # check graph attributes.
  291. return True
  292. def get_node_labels(Gn, node_label):
  293. """Get node labels of dataset Gn.
  294. """
  295. nl = set()
  296. for G in Gn:
  297. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  298. return nl
  299. def get_edge_labels(Gn, edge_label):
  300. """Get edge labels of dataset Gn.
  301. """
  302. el = set()
  303. for G in Gn:
  304. el = el | set(nx.get_edge_attributes(G, edge_label).values())
  305. return el
  306. def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}, **kwargs):
  307. if len(kwargs) != 0:
  308. kernel_options = kwargs
  309. if name == 'CommonWalk' or name == 'common walk':
  310. from gklearn.kernels import CommonWalk
  311. graph_kernel = CommonWalk(node_labels=node_labels,
  312. edge_labels=edge_labels,
  313. ds_infos=ds_infos,
  314. **kernel_options)
  315. elif name == 'Marginalized' or name == 'marginalized':
  316. from gklearn.kernels import Marginalized
  317. graph_kernel = Marginalized(node_labels=node_labels,
  318. edge_labels=edge_labels,
  319. ds_infos=ds_infos,
  320. **kernel_options)
  321. elif name == 'SylvesterEquation' or name == 'sylvester equation':
  322. from gklearn.kernels import SylvesterEquation
  323. graph_kernel = SylvesterEquation(
  324. ds_infos=ds_infos,
  325. **kernel_options)
  326. elif name == 'FixedPoint' or name == 'fixed point':
  327. from gklearn.kernels import FixedPoint
  328. graph_kernel = FixedPoint(node_labels=node_labels,
  329. edge_labels=edge_labels,
  330. node_attrs=node_attrs,
  331. edge_attrs=edge_attrs,
  332. ds_infos=ds_infos,
  333. **kernel_options)
  334. elif name == 'ConjugateGradient' or name == 'conjugate gradient':
  335. from gklearn.kernels import ConjugateGradient
  336. graph_kernel = ConjugateGradient(node_labels=node_labels,
  337. edge_labels=edge_labels,
  338. node_attrs=node_attrs,
  339. edge_attrs=edge_attrs,
  340. ds_infos=ds_infos,
  341. **kernel_options)
  342. elif name == 'SpectralDecomposition' or name == 'spectral decomposition':
  343. from gklearn.kernels import SpectralDecomposition
  344. graph_kernel = SpectralDecomposition(node_labels=node_labels,
  345. edge_labels=edge_labels,
  346. node_attrs=node_attrs,
  347. edge_attrs=edge_attrs,
  348. ds_infos=ds_infos,
  349. **kernel_options)
  350. elif name == 'ShortestPath' or name == 'shortest path':
  351. from gklearn.kernels import ShortestPath
  352. graph_kernel = ShortestPath(node_labels=node_labels,
  353. node_attrs=node_attrs,
  354. ds_infos=ds_infos,
  355. **kernel_options)
  356. elif name == 'StructuralSP' or name == 'structural shortest path':
  357. from gklearn.kernels import StructuralSP
  358. graph_kernel = StructuralSP(node_labels=node_labels,
  359. edge_labels=edge_labels,
  360. node_attrs=node_attrs,
  361. edge_attrs=edge_attrs,
  362. ds_infos=ds_infos,
  363. **kernel_options)
  364. elif name == 'PathUpToH' or name == 'path up to length h':
  365. from gklearn.kernels import PathUpToH
  366. graph_kernel = PathUpToH(node_labels=node_labels,
  367. edge_labels=edge_labels,
  368. ds_infos=ds_infos,
  369. **kernel_options)
  370. elif name == 'Treelet' or name == 'treelet':
  371. from gklearn.kernels import Treelet
  372. graph_kernel = Treelet(node_labels=node_labels,
  373. edge_labels=edge_labels,
  374. ds_infos=ds_infos,
  375. **kernel_options)
  376. elif name == 'WLSubtree' or name == 'weisfeiler-lehman subtree':
  377. from gklearn.kernels import WLSubtree
  378. graph_kernel = WLSubtree(node_labels=node_labels,
  379. edge_labels=edge_labels,
  380. ds_infos=ds_infos,
  381. **kernel_options)
  382. elif name == 'WeisfeilerLehman' or name == 'weisfeiler-lehman':
  383. from gklearn.kernels import WeisfeilerLehman
  384. graph_kernel = WeisfeilerLehman(node_labels=node_labels,
  385. edge_labels=edge_labels,
  386. ds_infos=ds_infos,
  387. **kernel_options)
  388. else:
  389. raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".')
  390. return graph_kernel
  391. def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False):
  392. import os
  393. from gklearn.utils import Dataset, split_dataset_by_target
  394. # 1. get dataset.
  395. print('1. getting dataset...')
  396. dataset_all = Dataset()
  397. dataset_all.load_predefined_dataset(ds_name)
  398. dataset_all.trim_dataset(edge_required=edge_required)
  399. if not irrelevant_labels is None:
  400. dataset_all.remove_labels(**irrelevant_labels)
  401. # dataset_all.cut_graphs(range(0, 10))
  402. datasets = split_dataset_by_target(dataset_all)
  403. gram_matrix_unnorm_list = []
  404. run_time_list = []
  405. print('start generating preimage for each class of target...')
  406. for idx, dataset in enumerate(datasets):
  407. target = dataset.targets[0]
  408. print('\ntarget =', target, '\n')
  409. # 2. initialize graph kernel.
  410. print('2. initializing graph kernel and setting parameters...')
  411. graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
  412. node_labels=dataset.node_labels,
  413. edge_labels=dataset.edge_labels,
  414. node_attrs=dataset.node_attrs,
  415. edge_attrs=dataset.edge_attrs,
  416. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  417. kernel_options=kernel_options)
  418. # 3. compute gram matrix.
  419. print('3. computing gram matrix...')
  420. gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
  421. gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
  422. gram_matrix_unnorm_list.append(gram_matrix_unnorm)
  423. run_time_list.append(run_time)
  424. # 4. save results.
  425. print()
  426. print('4. saving results...')
  427. if save_results:
  428. os.makedirs(dir_save, exist_ok=True)
  429. np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)
  430. print('\ncomplete.')
  431. def find_paths(G, source_node, length):
  432. """Find all paths with a certain length those start from a source node.
  433. A recursive depth first search is applied.
  434. Parameters
  435. ----------
  436. G : NetworkX graphs
  437. The graph in which paths are searched.
  438. source_node : integer
  439. The number of the node from where all paths start.
  440. length : integer
  441. The length of paths.
  442. Return
  443. ------
  444. path : list of list
  445. List of paths retrieved, where each path is represented by a list of nodes.
  446. """
  447. if length == 0:
  448. return [[source_node]]
  449. path = [[source_node] + path for neighbor in G[source_node] \
  450. for path in find_paths(G, neighbor, length - 1) if source_node not in path]
  451. return path
  452. def find_all_paths(G, length, is_directed):
  453. """Find all paths with a certain length in a graph. A recursive depth first
  454. search is applied.
  455. Parameters
  456. ----------
  457. G : NetworkX graphs
  458. The graph in which paths are searched.
  459. length : integer
  460. The length of paths.
  461. Return
  462. ------
  463. path : list of list
  464. List of paths retrieved, where each path is represented by a list of nodes.
  465. """
  466. all_paths = []
  467. for node in G:
  468. all_paths.extend(find_paths(G, node, length))
  469. if not is_directed:
  470. # For each path, two presentations are retrieved from its two extremities.
  471. # Remove one of them.
  472. all_paths_r = [path[::-1] for path in all_paths]
  473. for idx, path in enumerate(all_paths[:-1]):
  474. for path2 in all_paths_r[idx+1::]:
  475. if path == path2:
  476. all_paths[idx] = []
  477. break
  478. all_paths = list(filter(lambda a: a != [], all_paths))
  479. return all_paths
  480. def get_mlti_dim_node_attrs(G, attr_names):
  481. attributes = []
  482. for nd, attrs in G.nodes(data=True):
  483. attributes.append(tuple(attrs[aname] for aname in attr_names))
  484. return attributes
  485. def get_mlti_dim_edge_attrs(G, attr_names):
  486. attributes = []
  487. for ed, attrs in G.edges(data=True):
  488. attributes.append(tuple(attrs[aname] for aname in attr_names))
  489. return attributes
  490. def normalize_gram_matrix(gram_matrix):
  491. diag = gram_matrix.diagonal().copy()
  492. old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
  493. for i in range(len(gram_matrix)):
  494. for j in range(i, len(gram_matrix)):
  495. try:
  496. gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
  497. except:
  498. # rollback()
  499. np.seterr(**old_settings)
  500. raise
  501. else:
  502. gram_matrix[j][i] = gram_matrix[i][j]
  503. np.seterr(**old_settings)
  504. return gram_matrix
  505. def compute_distance_matrix(gram_matrix):
  506. dis_mat = np.empty((len(gram_matrix), len(gram_matrix)))
  507. for i in range(len(gram_matrix)):
  508. for j in range(i, len(gram_matrix)):
  509. dis = gram_matrix[i, i] + gram_matrix[j, j] - 2 * gram_matrix[i, j]
  510. if dis < 0:
  511. if dis > -1e-10:
  512. dis = 0
  513. else:
  514. raise ValueError('The distance is negative.')
  515. dis_mat[i, j] = np.sqrt(dis)
  516. dis_mat[j, i] = dis_mat[i, j]
  517. dis_max = np.max(np.max(dis_mat))
  518. dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
  519. dis_mean = np.mean(np.mean(dis_mat))
  520. return dis_mat, dis_max, dis_min, dis_mean
  521. # @todo: use it in ShortestPath.
  522. def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
  523. """Compute kernels between each pair of vertices in two graphs.
  524. Parameters
  525. ----------
  526. g1, g2 : NetworkX graph
  527. The kernels bewteen pairs of vertices in these two graphs are computed.
  528. node_kernels : dict
  529. A dictionary of kernel functions for nodes, including 3 items: 'symb'
  530. for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
  531. for both labels. The first 2 functions take two node labels as
  532. parameters, and the 'mix' function takes 4 parameters, a symbolic and a
  533. non-symbolic label for each the two nodes. Each label is in form of 2-D
  534. dimension array (n_samples, n_features). Each function returns a number
  535. as the kernel value. Ignored when nodes are unlabeled. This argument
  536. is designated to conjugate gradient method and fixed-point iterations.
  537. node_labels : list, optional
  538. The list of the name strings of the node labels. The default is [].
  539. node_attrs : list, optional
  540. The list of the name strings of the node attributes. The default is [].
  541. Returns
  542. -------
  543. vk_dict : dict
  544. Vertex kernels keyed by vertices.
  545. Notes
  546. -----
  547. This function is used by ``gklearn.kernels.FixedPoint'' and
  548. ``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1].
  549. References
  550. ----------
  551. .. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
  552. Parallelization of shortest path graph kernels on multi-core cpus and gpus.
  553. Proceedings of the Programmability Issues for Heterogeneous Multicores
  554. (MultiProg), Vienna, Austria, 2014.
  555. """
  556. vk_dict = {} # shortest path matrices dict
  557. if len(node_labels) > 0:
  558. # node symb and non-synb labeled
  559. if len(node_attrs) > 0:
  560. kn = node_kernels['mix']
  561. for n1 in g1.nodes(data=True):
  562. for n2 in g2.nodes(data=True):
  563. n1_labels = [n1[1][nl] for nl in node_labels]
  564. n2_labels = [n2[1][nl] for nl in node_labels]
  565. n1_attrs = [n1[1][na] for na in node_attrs]
  566. n2_attrs = [n2[1][na] for na in node_attrs]
  567. vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
  568. # node symb labeled
  569. else:
  570. kn = node_kernels['symb']
  571. for n1 in g1.nodes(data=True):
  572. for n2 in g2.nodes(data=True):
  573. n1_labels = [n1[1][nl] for nl in node_labels]
  574. n2_labels = [n2[1][nl] for nl in node_labels]
  575. vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
  576. else:
  577. # node non-synb labeled
  578. if len(node_attrs) > 0:
  579. kn = node_kernels['nsymb']
  580. for n1 in g1.nodes(data=True):
  581. for n2 in g2.nodes(data=True):
  582. n1_attrs = [n1[1][na] for na in node_attrs]
  583. n2_attrs = [n2[1][na] for na in node_attrs]
  584. vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
  585. # node unlabeled
  586. else:
  587. pass # @todo: add edge weights.
  588. # for e1 in g1.edges(data=True):
  589. # for e2 in g2.edges(data=True):
  590. # if e1[2]['cost'] == e2[2]['cost']:
  591. # kernel += 1
  592. # return kernel
  593. return vk_dict
  594. def dummy_node():
  595. """
  596. /*!
  597. * @brief Returns a dummy node.
  598. * @return ID of dummy node.
  599. */
  600. """
  601. return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why.
  602. def undefined_node():
  603. """
  604. /*!
  605. * @brief Returns an undefined node.
  606. * @return ID of undefined node.
  607. */
  608. """
  609. return np.inf
  610. def dummy_edge():
  611. """
  612. /*!
  613. * @brief Returns a dummy edge.
  614. * @return ID of dummy edge.
  615. */
  616. """
  617. return np.inf
  618. @unique
  619. class SpecialLabel(Enum):
  620. """can be used to define special labels.
  621. """
  622. DUMMY = 1 # The dummy label.
  623. # DUMMY = auto # enum.auto does not exist in Python 3.5.

A Python package for graph kernels, graph edit distances and graph pre-image problem.