""" @author: linlin @references: Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. """ import sys import pathlib sys.path.insert(0, "../") from tqdm import tqdm import time import networkx as nx import numpy as np from pygraph.utils.utils import getSPGraph from pygraph.utils.graphdataset import get_dataset_attributes def spkernel(*args, node_label='atom', edge_weight=None): """Calculate shortest-path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph List of graphs between which the kernels are calculated. / G1, G2 : NetworkX graphs 2 graphs between which the kernel is calculated. edge_weight : string Edge attribute corresponding to the edge weight. Return ------ Kmatrix : Numpy matrix Kernel matrix, each element of which is the sp kernel between 2 praphs. """ Gn = args[0] if len(args) == 1 else [args[0], args[1]] Kmatrix = np.zeros((len(Gn), len(Gn))) try: some_weight = list( nx.get_edge_attributes(Gn[0], edge_weight).values())[0] weight = edge_label if isinstance(some_weight, float) or isinstance( some_weight, int) else None except: weight = None ds_attrs = get_dataset_attributes( Gn, attr_names=['node_labeled'], node_label=node_label) start_time = time.time() # get shortest path graphs of Gn Gn = [ getSPGraph(G, edge_weight=edge_weight) for G in tqdm(Gn, desc='getting sp graphs', file=sys.stdout) ] pbar = tqdm( total=((len(Gn) + 1) * len(Gn) / 2), desc='calculating kernels', file=sys.stdout) if ds_attrs['node_labeled']: for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data=True): for e2 in Gn[j].edges(data=True): # cost of a node to itself equals to 0, cost between two disconnected nodes is Inf. if e1[2]['cost'] != 0 and e1[2] != np.Inf and e1[2]['cost'] == e2[2]['cost'] and { Gn[i].nodes[e1[0]][node_label], Gn[i].nodes[e1[1]][node_label] } == { Gn[j].nodes[e2[0]][node_label], Gn[j].nodes[e2[1]][node_label] }: Kmatrix[i][j] += 1 Kmatrix[j][i] = Kmatrix[i][j] pbar.update(1) else: for i in range(0, len(Gn)): for j in range(i, len(Gn)): # kernel_t = [ e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])) \ # for e1 in Sn[i].edges(data = True) for e2 in Sn[j].edges(data = True) ] # Kmatrix[i][j] = np.sum(kernel_t) # Kmatrix[j][i] = Kmatrix[i][j] for e1 in Gn[i].edges(data=True): for e2 in Gn[j].edges(data=True): if e1[2]['cost'] != 0 and e1[2] != np.Inf and e1[2]['cost'] == e2[2]['cost']: Kmatrix[i][j] += 1 Kmatrix[j][i] = Kmatrix[i][j] pbar.update(1) run_time = time.time() - start_time print( "--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time)) return Kmatrix, run_time