You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 14 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. import networkx as nx
  2. import numpy as np
  3. from copy import deepcopy
  4. #from itertools import product
  5. # from tqdm import tqdm
  6. def getSPLengths(G1):
  7. sp = nx.shortest_path(G1)
  8. distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
  9. for i in sp.keys():
  10. for j in sp[i].keys():
  11. distances[i, j] = len(sp[i][j]) - 1
  12. return distances
  13. def getSPGraph(G, edge_weight=None):
  14. """Transform graph G to its corresponding shortest-paths graph.
  15. Parameters
  16. ----------
  17. G : NetworkX graph
  18. The graph to be tramsformed.
  19. edge_weight : string
  20. edge attribute corresponding to the edge weight.
  21. Return
  22. ------
  23. S : NetworkX graph
  24. The shortest-paths graph corresponding to G.
  25. Notes
  26. ------
  27. For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
  28. References
  29. ----------
  30. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  31. """
  32. return floydTransformation(G, edge_weight=edge_weight)
  33. def floydTransformation(G, edge_weight=None):
  34. """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
  35. Parameters
  36. ----------
  37. G : NetworkX graph
  38. The graph to be tramsformed.
  39. edge_weight : string
  40. edge attribute corresponding to the edge weight. The default edge weight is bond_type.
  41. Return
  42. ------
  43. S : NetworkX graph
  44. The shortest-paths graph corresponding to G.
  45. References
  46. ----------
  47. .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  48. """
  49. spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
  50. S = nx.Graph()
  51. S.add_nodes_from(G.nodes(data=True))
  52. ns = list(G.nodes())
  53. for i in range(0, G.number_of_nodes()):
  54. for j in range(i + 1, G.number_of_nodes()):
  55. if spMatrix[i, j] != np.inf:
  56. S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
  57. return S
  58. def get_shortest_paths(G, weight, directed):
  59. """Get all shortest paths of a graph.
  60. Parameters
  61. ----------
  62. G : NetworkX graphs
  63. The graphs whose paths are calculated.
  64. weight : string/None
  65. edge attribute used as weight to calculate the shortest path.
  66. directed: boolean
  67. Whether graph is directed.
  68. Return
  69. ------
  70. sp : list of list
  71. List of shortest paths of the graph, where each path is represented by a list of nodes.
  72. """
  73. from itertools import combinations
  74. sp = []
  75. for n1, n2 in combinations(G.nodes(), 2):
  76. try:
  77. spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
  78. except nx.NetworkXNoPath: # nodes not connected
  79. pass
  80. else:
  81. sp += spltemp
  82. # each edge walk is counted twice, starting from both its extreme nodes.
  83. if not directed:
  84. sp += [sptemp[::-1] for sptemp in spltemp]
  85. # add single nodes as length 0 paths.
  86. sp += [[n] for n in G.nodes()]
  87. return sp
  88. def untotterTransformation(G, node_label, edge_label):
  89. """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
  90. Parameters
  91. ----------
  92. G : NetworkX graph
  93. The graph to be tramsformed.
  94. node_label : string
  95. node attribute used as label. The default node label is 'atom'.
  96. edge_label : string
  97. edge attribute used as label. The default edge label is 'bond_type'.
  98. Return
  99. ------
  100. gt : NetworkX graph
  101. The transformed graph corresponding to G.
  102. References
  103. ----------
  104. .. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  105. """
  106. # arrange all graphs in a list
  107. G = G.to_directed()
  108. gt = nx.Graph()
  109. gt.graph = G.graph
  110. gt.add_nodes_from(G.nodes(data=True))
  111. for edge in G.edges():
  112. gt.add_node(edge)
  113. gt.nodes[edge].update({node_label: G.nodes[edge[1]][node_label]})
  114. gt.add_edge(edge[0], edge)
  115. gt.edges[edge[0], edge].update({
  116. edge_label:
  117. G[edge[0]][edge[1]][edge_label]
  118. })
  119. for neighbor in G[edge[1]]:
  120. if neighbor != edge[0]:
  121. gt.add_edge(edge, (edge[1], neighbor))
  122. gt.edges[edge, (edge[1], neighbor)].update({
  123. edge_label:
  124. G[edge[1]][neighbor][edge_label]
  125. })
  126. # nx.draw_networkx(gt)
  127. # plt.show()
  128. # relabel nodes using consecutive integers for convenience of kernel calculation.
  129. gt = nx.convert_node_labels_to_integers(
  130. gt, first_label=0, label_attribute='label_orignal')
  131. return gt
  132. def direct_product(G1, G2, node_label, edge_label):
  133. """Return the direct/tensor product of directed graphs G1 and G2.
  134. Parameters
  135. ----------
  136. G1, G2 : NetworkX graph
  137. The original graphs.
  138. node_label : string
  139. node attribute used as label. The default node label is 'atom'.
  140. edge_label : string
  141. edge attribute used as label. The default edge label is 'bond_type'.
  142. Return
  143. ------
  144. gt : NetworkX graph
  145. The direct product graph of G1 and G2.
  146. Notes
  147. -----
  148. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  149. References
  150. ----------
  151. .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  152. """
  153. # arrange all graphs in a list
  154. from itertools import product
  155. # G = G.to_directed()
  156. gt = nx.DiGraph()
  157. # add nodes
  158. for u, v in product(G1, G2):
  159. if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
  160. gt.add_node((u, v))
  161. gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
  162. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  163. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  164. if (u1, u2) in gt and (
  165. v1, v2
  166. ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
  167. v2][edge_label]:
  168. gt.add_edge((u1, u2), (v1, v2))
  169. gt.edges[(u1, u2), (v1, v2)].update({
  170. edge_label:
  171. G1.edges[u1, v1][edge_label]
  172. })
  173. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  174. # for u, v in product(gt, gt):
  175. # if (u[0], v[0]) in G1.edges and (
  176. # u[1], v[1]
  177. # ) in G2.edges and G1.edges[u[0],
  178. # v[0]][edge_label] == G2.edges[u[1],
  179. # v[1]][edge_label]:
  180. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  181. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  182. # edge_label:
  183. # G1.edges[u[0], v[0]][edge_label]
  184. # })
  185. # relabel nodes using consecutive integers for convenience of kernel calculation.
  186. # gt = nx.convert_node_labels_to_integers(
  187. # gt, first_label=0, label_attribute='label_orignal')
  188. return gt
  189. def graph_deepcopy(G):
  190. """Deep copy a graph, including deep copy of all nodes, edges and
  191. attributes of the graph, nodes and edges.
  192. Note
  193. ----
  194. It is the same as the NetworkX function graph.copy(), as far as I know.
  195. """
  196. # add graph attributes.
  197. labels = {}
  198. for k, v in G.graph.items():
  199. labels[k] = deepcopy(v)
  200. if G.is_directed():
  201. G_copy = nx.DiGraph(**labels)
  202. else:
  203. G_copy = nx.Graph(**labels)
  204. # add nodes
  205. for nd, attrs in G.nodes(data=True):
  206. labels = {}
  207. for k, v in attrs.items():
  208. labels[k] = deepcopy(v)
  209. G_copy.add_node(nd, **labels)
  210. # add edges.
  211. for nd1, nd2, attrs in G.edges(data=True):
  212. labels = {}
  213. for k, v in attrs.items():
  214. labels[k] = deepcopy(v)
  215. G_copy.add_edge(nd1, nd2, **labels)
  216. return G_copy
  217. def graph_isIdentical(G1, G2):
  218. """Check if two graphs are identical, including: same nodes, edges, node
  219. labels/attributes, edge labels/attributes.
  220. Notes
  221. -----
  222. 1. The type of graphs has to be the same.
  223. 2. Global/Graph attributes are neglected as they may contain names for graphs.
  224. """
  225. # check nodes.
  226. nlist1 = [n for n in G1.nodes(data=True)]
  227. nlist2 = [n for n in G2.nodes(data=True)]
  228. if not nlist1 == nlist2:
  229. return False
  230. # check edges.
  231. elist1 = [n for n in G1.edges(data=True)]
  232. elist2 = [n for n in G2.edges(data=True)]
  233. if not elist1 == elist2:
  234. return False
  235. # check graph attributes.
  236. return True
  237. def get_node_labels(Gn, node_label):
  238. """Get node labels of dataset Gn.
  239. """
  240. nl = set()
  241. for G in Gn:
  242. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  243. return nl
  244. def get_edge_labels(Gn, edge_label):
  245. """Get edge labels of dataset Gn.
  246. """
  247. el = set()
  248. for G in Gn:
  249. el = el | set(nx.get_edge_attributes(G, edge_label).values())
  250. return el
  251. def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
  252. if name == 'ShortestPath':
  253. from gklearn.kernels import ShortestPath
  254. graph_kernel = ShortestPath(node_labels=node_labels,
  255. node_attrs=node_attrs,
  256. ds_infos=ds_infos,
  257. **kernel_options)
  258. elif name == 'StructuralSP':
  259. from gklearn.kernels import StructuralSP
  260. graph_kernel = StructuralSP(node_labels=node_labels,
  261. edge_labels=edge_labels,
  262. node_attrs=node_attrs,
  263. edge_attrs=edge_attrs,
  264. ds_infos=ds_infos,
  265. **kernel_options)
  266. elif name == 'PathUpToH':
  267. from gklearn.kernels import PathUpToH
  268. graph_kernel = PathUpToH(node_labels=node_labels,
  269. edge_labels=edge_labels,
  270. ds_infos=ds_infos,
  271. **kernel_options)
  272. elif name == 'Treelet':
  273. from gklearn.kernels import Treelet
  274. graph_kernel = Treelet(node_labels=node_labels,
  275. edge_labels=edge_labels,
  276. ds_infos=ds_infos,
  277. **kernel_options)
  278. elif name == 'WeisfeilerLehman':
  279. from gklearn.kernels import WeisfeilerLehman
  280. graph_kernel = WeisfeilerLehman(node_labels=node_labels,
  281. edge_labels=edge_labels,
  282. ds_infos=ds_infos,
  283. **kernel_options)
  284. else:
  285. raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".')
  286. return graph_kernel
  287. def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None):
  288. from gklearn.utils import Dataset, split_dataset_by_target
  289. # 1. get dataset.
  290. print('1. getting dataset...')
  291. dataset_all = Dataset()
  292. dataset_all.load_predefined_dataset(ds_name)
  293. if not irrelevant_labels is None:
  294. dataset_all.remove_labels(**irrelevant_labels)
  295. # dataset_all.cut_graphs(range(0, 10))
  296. datasets = split_dataset_by_target(dataset_all)
  297. gram_matrix_unnorm_list = []
  298. run_time_list = []
  299. print('start generating preimage for each class of target...')
  300. for idx, dataset in enumerate(datasets):
  301. target = dataset.targets[0]
  302. print('\ntarget =', target, '\n')
  303. # 2. initialize graph kernel.
  304. print('2. initializing graph kernel and setting parameters...')
  305. graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
  306. node_labels=dataset.node_labels,
  307. edge_labels=dataset.edge_labels,
  308. node_attrs=dataset.node_attrs,
  309. edge_attrs=dataset.edge_attrs,
  310. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  311. kernel_options=kernel_options)
  312. # 3. compute gram matrix.
  313. print('3. computing gram matrix...')
  314. gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
  315. gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
  316. gram_matrix_unnorm_list.append(gram_matrix_unnorm)
  317. run_time_list.append(run_time)
  318. # 4. save results.
  319. print()
  320. print('4. saving results...')
  321. if save_results:
  322. np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)
  323. print('\ncomplete.')
  324. def find_paths(G, source_node, length):
  325. """Find all paths with a certain length those start from a source node.
  326. A recursive depth first search is applied.
  327. Parameters
  328. ----------
  329. G : NetworkX graphs
  330. The graph in which paths are searched.
  331. source_node : integer
  332. The number of the node from where all paths start.
  333. length : integer
  334. The length of paths.
  335. Return
  336. ------
  337. path : list of list
  338. List of paths retrieved, where each path is represented by a list of nodes.
  339. """
  340. if length == 0:
  341. return [[source_node]]
  342. path = [[source_node] + path for neighbor in G[source_node] \
  343. for path in find_paths(G, neighbor, length - 1) if source_node not in path]
  344. return path
  345. def find_all_paths(G, length, is_directed):
  346. """Find all paths with a certain length in a graph. A recursive depth first
  347. search is applied.
  348. Parameters
  349. ----------
  350. G : NetworkX graphs
  351. The graph in which paths are searched.
  352. length : integer
  353. The length of paths.
  354. Return
  355. ------
  356. path : list of list
  357. List of paths retrieved, where each path is represented by a list of nodes.
  358. """
  359. all_paths = []
  360. for node in G:
  361. all_paths.extend(find_paths(G, node, length))
  362. if not is_directed:
  363. # For each path, two presentations are retrieved from its two extremities.
  364. # Remove one of them.
  365. all_paths_r = [path[::-1] for path in all_paths]
  366. for idx, path in enumerate(all_paths[:-1]):
  367. for path2 in all_paths_r[idx+1::]:
  368. if path == path2:
  369. all_paths[idx] = []
  370. break
  371. all_paths = list(filter(lambda a: a != [], all_paths))
  372. return all_paths
  373. def get_mlti_dim_node_attrs(G, attr_names):
  374. attributes = []
  375. for nd, attrs in G.nodes(data=True):
  376. attributes.append(tuple(attrs[aname] for aname in attr_names))
  377. return attributes
  378. def get_mlti_dim_edge_attrs(G, attr_names):
  379. attributes = []
  380. for ed, attrs in G.edges(data=True):
  381. attributes.append(tuple(attrs[aname] for aname in attr_names))
  382. return attributes

A Python package for graph kernels, graph edit distances and graph pre-image problem.