You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. import networkx as nx
  2. import numpy as np
  3. from copy import deepcopy
  4. #from itertools import product
  5. # from tqdm import tqdm
  6. def getSPLengths(G1):
  7. sp = nx.shortest_path(G1)
  8. distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
  9. for i in sp.keys():
  10. for j in sp[i].keys():
  11. distances[i, j] = len(sp[i][j]) - 1
  12. return distances
  13. def getSPGraph(G, edge_weight=None):
  14. """Transform graph G to its corresponding shortest-paths graph.
  15. Parameters
  16. ----------
  17. G : NetworkX graph
  18. The graph to be tramsformed.
  19. edge_weight : string
  20. edge attribute corresponding to the edge weight.
  21. Return
  22. ------
  23. S : NetworkX graph
  24. The shortest-paths graph corresponding to G.
  25. Notes
  26. ------
  27. For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
  28. References
  29. ----------
  30. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  31. """
  32. return floydTransformation(G, edge_weight=edge_weight)
  33. def floydTransformation(G, edge_weight=None):
  34. """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
  35. Parameters
  36. ----------
  37. G : NetworkX graph
  38. The graph to be tramsformed.
  39. edge_weight : string
  40. edge attribute corresponding to the edge weight. The default edge weight is bond_type.
  41. Return
  42. ------
  43. S : NetworkX graph
  44. The shortest-paths graph corresponding to G.
  45. References
  46. ----------
  47. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  48. """
  49. spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
  50. S = nx.Graph()
  51. S.add_nodes_from(G.nodes(data=True))
  52. ns = list(G.nodes())
  53. for i in range(0, G.number_of_nodes()):
  54. for j in range(i + 1, G.number_of_nodes()):
  55. if spMatrix[i, j] != np.inf:
  56. S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
  57. return S
  58. def get_shortest_paths(G, weight, directed):
  59. """Get all shortest paths of a graph.
  60. Parameters
  61. ----------
  62. G : NetworkX graphs
  63. The graphs whose paths are calculated.
  64. weight : string/None
  65. edge attribute used as weight to calculate the shortest path.
  66. directed: boolean
  67. Whether graph is directed.
  68. Return
  69. ------
  70. sp : list of list
  71. List of shortest paths of the graph, where each path is represented by a list of nodes.
  72. """
  73. from itertools import combinations
  74. sp = []
  75. for n1, n2 in combinations(G.nodes(), 2):
  76. try:
  77. spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
  78. except nx.NetworkXNoPath: # nodes not connected
  79. pass
  80. else:
  81. sp += spltemp
  82. # each edge walk is counted twice, starting from both its extreme nodes.
  83. if not directed:
  84. sp += [sptemp[::-1] for sptemp in spltemp]
  85. # add single nodes as length 0 paths.
  86. sp += [[n] for n in G.nodes()]
  87. return sp
  88. def untotterTransformation(G, node_label, edge_label):
  89. """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
  90. Parameters
  91. ----------
  92. G : NetworkX graph
  93. The graph to be tramsformed.
  94. node_label : string
  95. node attribute used as label. The default node label is 'atom'.
  96. edge_label : string
  97. edge attribute used as label. The default edge label is 'bond_type'.
  98. Return
  99. ------
  100. gt : NetworkX graph
  101. The transformed graph corresponding to G.
  102. References
  103. ----------
  104. .. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  105. """
  106. # arrange all graphs in a list
  107. G = G.to_directed()
  108. gt = nx.Graph()
  109. gt.graph = G.graph
  110. gt.add_nodes_from(G.nodes(data=True))
  111. for edge in G.edges():
  112. gt.add_node(edge)
  113. gt.nodes[edge].update({node_label: G.nodes[edge[1]][node_label]})
  114. gt.add_edge(edge[0], edge)
  115. gt.edges[edge[0], edge].update({
  116. edge_label:
  117. G[edge[0]][edge[1]][edge_label]
  118. })
  119. for neighbor in G[edge[1]]:
  120. if neighbor != edge[0]:
  121. gt.add_edge(edge, (edge[1], neighbor))
  122. gt.edges[edge, (edge[1], neighbor)].update({
  123. edge_label:
  124. G[edge[1]][neighbor][edge_label]
  125. })
  126. # nx.draw_networkx(gt)
  127. # plt.show()
  128. # relabel nodes using consecutive integers for convenience of kernel calculation.
  129. gt = nx.convert_node_labels_to_integers(
  130. gt, first_label=0, label_attribute='label_orignal')
  131. return gt
  132. def direct_product(G1, G2, node_label, edge_label):
  133. """Return the direct/tensor product of directed graphs G1 and G2.
  134. Parameters
  135. ----------
  136. G1, G2 : NetworkX graph
  137. The original graphs.
  138. node_label : string
  139. node attribute used as label. The default node label is 'atom'.
  140. edge_label : string
  141. edge attribute used as label. The default edge label is 'bond_type'.
  142. Return
  143. ------
  144. gt : NetworkX graph
  145. The direct product graph of G1 and G2.
  146. Notes
  147. -----
  148. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  149. References
  150. ----------
  151. .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  152. """
  153. # arrange all graphs in a list
  154. from itertools import product
  155. # G = G.to_directed()
  156. gt = nx.DiGraph()
  157. # add nodes
  158. for u, v in product(G1, G2):
  159. if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
  160. gt.add_node((u, v))
  161. gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
  162. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  163. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  164. if (u1, u2) in gt and (
  165. v1, v2
  166. ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
  167. v2][edge_label]:
  168. gt.add_edge((u1, u2), (v1, v2))
  169. gt.edges[(u1, u2), (v1, v2)].update({
  170. edge_label:
  171. G1.edges[u1, v1][edge_label]
  172. })
  173. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  174. # for u, v in product(gt, gt):
  175. # if (u[0], v[0]) in G1.edges and (
  176. # u[1], v[1]
  177. # ) in G2.edges and G1.edges[u[0],
  178. # v[0]][edge_label] == G2.edges[u[1],
  179. # v[1]][edge_label]:
  180. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  181. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  182. # edge_label:
  183. # G1.edges[u[0], v[0]][edge_label]
  184. # })
  185. # relabel nodes using consecutive integers for convenience of kernel calculation.
  186. # gt = nx.convert_node_labels_to_integers(
  187. # gt, first_label=0, label_attribute='label_orignal')
  188. return gt
  189. def graph_deepcopy(G):
  190. """Deep copy a graph, including deep copy of all nodes, edges and
  191. attributes of the graph, nodes and edges.
  192. Note
  193. ----
  194. It is the same as the NetworkX function graph.copy(), as far as I know.
  195. """
  196. # add graph attributes.
  197. labels = {}
  198. for k, v in G.graph.items():
  199. labels[k] = deepcopy(v)
  200. if G.is_directed():
  201. G_copy = nx.DiGraph(**labels)
  202. else:
  203. G_copy = nx.Graph(**labels)
  204. # add nodes
  205. for nd, attrs in G.nodes(data=True):
  206. labels = {}
  207. for k, v in attrs.items():
  208. labels[k] = deepcopy(v)
  209. G_copy.add_node(nd, **labels)
  210. # add edges.
  211. for nd1, nd2, attrs in G.edges(data=True):
  212. labels = {}
  213. for k, v in attrs.items():
  214. labels[k] = deepcopy(v)
  215. G_copy.add_edge(nd1, nd2, **labels)
  216. return G_copy
  217. def graph_isIdentical(G1, G2):
  218. """Check if two graphs are identical, including: same nodes, edges, node
  219. labels/attributes, edge labels/attributes.
  220. Notes
  221. -----
  222. 1. The type of graphs has to be the same.
  223. 2. Global/Graph attributes are neglected as they may contain names for graphs.
  224. """
  225. # check nodes.
  226. nlist1 = [n for n in G1.nodes(data=True)]
  227. nlist2 = [n for n in G2.nodes(data=True)]
  228. if not nlist1 == nlist2:
  229. return False
  230. # check edges.
  231. elist1 = [n for n in G1.edges(data=True)]
  232. elist2 = [n for n in G2.edges(data=True)]
  233. if not elist1 == elist2:
  234. return False
  235. # check graph attributes.
  236. return True
  237. def get_node_labels(Gn, node_label):
  238. """Get node labels of dataset Gn.
  239. """
  240. nl = set()
  241. for G in Gn:
  242. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  243. return nl
  244. def get_edge_labels(Gn, edge_label):
  245. """Get edge labels of dataset Gn.
  246. """
  247. el = set()
  248. for G in Gn:
  249. el = el | set(nx.get_edge_attributes(G, edge_label).values())
  250. return el
  251. def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
  252. if name == 'structuralspkernel':
  253. from gklearn.kernels import StructuralSP
  254. graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels,
  255. node_attrs=node_attrs, edge_attrs=edge_attrs,
  256. ds_infos=ds_infos, **kernel_options)
  257. return graph_kernel
  258. def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None):
  259. from gklearn.utils import Dataset, split_dataset_by_target
  260. # 1. get dataset.
  261. print('1. getting dataset...')
  262. dataset_all = Dataset()
  263. dataset_all.load_predefined_dataset(ds_name)
  264. if not irrelevant_labels is None:
  265. dataset_all.remove_labels(**irrelevant_labels)
  266. # dataset_all.cut_graphs(range(0, 10))
  267. datasets = split_dataset_by_target(dataset_all)
  268. gram_matrix_unnorm_list = []
  269. run_time_list = []
  270. print('start generating preimage for each class of target...')
  271. for idx, dataset in enumerate(datasets):
  272. target = dataset.targets[0]
  273. print('\ntarget =', target, '\n')
  274. # 2. initialize graph kernel.
  275. print('2. initializing graph kernel and setting parameters...')
  276. graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
  277. node_labels=dataset.node_labels,
  278. edge_labels=dataset.edge_labels,
  279. node_attrs=dataset.node_attrs,
  280. edge_attrs=dataset.edge_attrs,
  281. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  282. kernel_options=kernel_options)
  283. # 3. compute gram matrix.
  284. print('3. computing gram matrix...')
  285. gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
  286. gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
  287. gram_matrix_unnorm_list.append(gram_matrix_unnorm)
  288. run_time_list.append(run_time)
  289. # 4. save results.
  290. print()
  291. print('4. saving results...')
  292. if save_results:
  293. np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)
  294. print('\ncomplete.')

A Python package for graph kernels, graph edit distances and graph pre-image problem.