import sys import pathlib sys.path.insert(0, "../") import networkx as nx import numpy as np import time from pygraph.kernels.deltaKernel import deltakernel def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'): """Calculate mean average path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph List of graphs between which the kernels are calculated. / G1, G2 : NetworkX graphs 2 graphs between which the kernel is calculated. node_label : string node attribute used as label. The default node label is atom. edge_label : string edge attribute used as label. The default edge label is bond_type. Return ------ Kmatrix/kernel : Numpy matrix/float Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs. References ---------- [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360). """ some_graph = args[0][0] if len(args) == 1 else args[0] # only edge attributes of type int or float can be used as edge weight to calculate the shortest paths. some_weight = list(nx.get_edge_attributes(some_graph, edge_label).values())[0] weight = edge_label if isinstance(some_weight, float) or isinstance(some_weight, int) else None if len(args) == 1: # for a list of graphs Gn = args[0] Kmatrix = np.zeros((len(Gn), len(Gn))) start_time = time.time() for i in range(0, len(Gn)): for j in range(i, len(Gn)): Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], node_label, edge_label, weight = weight) Kmatrix[j][i] = Kmatrix[i][j] run_time = time.time() - start_time print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time)) return Kmatrix, run_time else: # for only 2 graphs start_time = time.time() kernel = _pathkernel_do(args[0], args[1], node_label, edge_label, weight = weight) run_time = time.time() - start_time print("\n --- mean average path kernel built in %s seconds ---" % (run_time)) return kernel, run_time def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight = None): """Calculate mean average path kernel between 2 graphs. Parameters ---------- G1, G2 : NetworkX graphs 2 graphs between which the kernel is calculated. node_label : string node attribute used as label. The default node label is atom. edge_label : string edge attribute used as label. The default edge label is bond_type. weight : string/None edge attribute used as weight to calculate the shortest path. The default edge label is None. Return ------ kernel : float Path Kernel between 2 graphs. """ # calculate shortest paths for both graphs sp1 = [] num_nodes = G1.number_of_nodes() for node1 in range(num_nodes): for node2 in range(node1 + 1, num_nodes): sp1.append(nx.shortest_path(G1, node1, node2, weight = weight)) sp2 = [] num_nodes = G2.number_of_nodes() for node1 in range(num_nodes): for node2 in range(node1 + 1, num_nodes): sp2.append(nx.shortest_path(G2, node1, node2, weight = weight)) # calculate kernel kernel = 0 for path1 in sp1: for path2 in sp2: if len(path1) == len(path2): kernel_path = deltakernel(G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label]) if kernel_path: for i in range(1, len(path1)): # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0 kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) * deltakernel(G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label]) kernel += kernel_path # add up kernels of all paths kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average return kernel