diff --git a/pygraph/kernels/marginalizedKernel.py b/pygraph/kernels/marginalizedKernel.py new file mode 100644 index 0000000..983444f --- /dev/null +++ b/pygraph/kernels/marginalizedKernel.py @@ -0,0 +1,116 @@ +import sys +import pathlib +sys.path.insert(0, "../") + +import networkx as nx +import numpy as np +import time + +def marginalizedkernel(*args): + """Calculate marginalized graph kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + / + G1, G2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + p_quit : integer + the termination probability in the random walks generating step + itr : integer + time of iterations to calculate R_inf + + Return + ------ + Kmatrix/Kernel : Numpy matrix/int + Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs. + + References + ---------- + [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. + """ + if len(args) == 3: # for a list of graphs + Gn = args[0] + + Kmatrix = np.zeros((len(Gn), len(Gn))) + + start_time = time.time() + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2]) + Kmatrix[j][i] = Kmatrix[i][j] + + print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) + + return Kmatrix + + else: # for only 2 graphs + + # init parameters + G1 = args[0] + G2 = args[1] + p_quit = args[2] # the termination probability in the random walks generating step + itr = args[3] # time of iterations to calculate R_inf + + kernel = 0 + num_nodes_G1 = nx.number_of_nodes(G1) + num_nodes_G2 = nx.number_of_nodes(G2) + p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|) + p_init_G2 = 1 / num_nodes_G2 + + q = p_quit * p_quit + r1 = q + + # initial R_inf + R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes + + # calculate R_inf with a simple interative method + for i in range(1, itr): + R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) + R_inf_new.fill(r1) + + # calculate R_inf for each pair of nodes + for node1 in G1.nodes(data = True): + neighbor_n1 = G1[node1[0]] + p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex) + for node2 in G2.nodes(data = True): + neighbor_n2 = G2[node2[0]] + p_trans_n2 = (1 - p_quit) / len(neighbor_n2) + + for neighbor1 in neighbor_n1: + for neighbor2 in neighbor_n2: + + t = p_trans_n1 * p_trans_n2 * \ + deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \ + deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label']) + R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8) + + R_inf[:] = R_inf_new + + # add elements of R_inf up and calculate kernel + for node1 in G1.nodes(data = True): + for node2 in G2.nodes(data = True): + s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label']) + kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) + + return kernel + +def deltaKernel(condition): + """Return 1 if condition holds, 0 otherwise. + + Parameters + ---------- + condition : Boolean + A condition, according to which the kernel is set to 1 or 0. + + Return + ------ + Kernel : integer + Delta Kernel. + + References + ---------- + [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. + """ + return (1 if condition else 0) \ No newline at end of file diff --git a/pygraph/kernels/pathKernel.py b/pygraph/kernels/pathKernel.py new file mode 100644 index 0000000..2b1823a --- /dev/null +++ b/pygraph/kernels/pathKernel.py @@ -0,0 +1,68 @@ +import sys +import pathlib +sys.path.insert(0, "../") + + +import networkx as nx +import numpy as np +import time + +from utils.utils import getSPGraph + + +def spkernel(*args): + """Calculate shortest-path kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + / + G1, G2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + + Return + ------ + Kmatrix/Kernel : Numpy matrix/int + Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + if len(args) == 1: # for a list of graphs + Gn = args[0] + + Kmatrix = np.zeros((len(Gn), len(Gn))) + + Sn = [] # get shortest path graphs of Gn + for i in range(0, len(Gn)): + Sn.append(getSPGraph(Gn[i])) + + start_time = time.time() + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Sn[i].edges(data = True): + for e2 in Sn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] += (0 if i == j else 1) + + print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) + + return Kmatrix + + else: # for only 2 graphs + G1 = args[0] + G2 = args[1] + + kernel = 0 + + for e1 in G1.edges(data = True): + for e2 in G2.edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + kernel += 1 + + print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) + + return kernel \ No newline at end of file