|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Sun Dec 23 16:53:57 2018
-
- @author: ljia
- @references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and
- Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research,
- 11(Apr):1201–1242, 2010.
- """
-
- import sys
- sys.path.insert(0, "../")
- import time
- from functools import partial
- from tqdm import tqdm
-
- import networkx as nx
- import numpy as np
- from scipy.sparse import identity, kron
- from scipy.sparse.linalg import cg
- from scipy.optimize import fixed_point
-
- from gklearn.utils.graphdataset import get_dataset_attributes
- from gklearn.utils.parallel import parallel_gm
-
- def randomwalkkernel(*args,
- # params for all method.
- compute_method=None,
- weight=1,
- p=None,
- q=None,
- edge_weight=None,
- # params for conjugate and fp method.
- node_kernels=None,
- edge_kernels=None,
- node_label='atom',
- edge_label='bond_type',
- # params for spectral method.
- sub_kernel=None,
- n_jobs=None):
- """Calculate random walk graph kernels.
- Parameters
- ----------
- Gn : List of NetworkX graph
- List of graphs between which the kernels are calculated.
- /
- G1, G2 : NetworkX graphs
- 2 graphs between which the kernel is calculated.
- node_label : string
- node attribute used as label. The default node label is atom.
- edge_label : string
- edge attribute used as label. The default edge label is bond_type.
- h : integer
- Longest length of walks.
- method : string
- Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
-
- Return
- ------
- Kmatrix : Numpy matrix
- Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
- """
- compute_method = compute_method.lower()
- Gn = args[0] if len(args) == 1 else [args[0], args[1]]
-
- eweight = None
- if edge_weight == None:
- print('\n None edge weight specified. Set all weight to 1.\n')
- else:
- try:
- some_weight = list(
- nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
- if isinstance(some_weight, float) or isinstance(some_weight, int):
- eweight = edge_weight
- else:
- print(
- '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
- % edge_weight)
- except:
- print(
- '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
- % edge_weight)
-
- ds_attrs = get_dataset_attributes(
- Gn,
- attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
- 'edge_attr_dim', 'is_directed'],
- node_label=node_label,
- edge_label=edge_label)
- ds_attrs['node_attr_dim'] = 0
- ds_attrs['edge_attr_dim'] = 0
-
- # remove graphs with no edges, as no walk can be found in their structures,
- # so the weight matrix between such a graph and itself might be zero.
- len_gn = len(Gn)
- Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
- idx = [G[0] for G in Gn]
- Gn = [G[1] for G in Gn]
- if len(Gn) != len_gn:
- print('\n %d graphs are removed as they don\'t contain edges.\n' %
- (len_gn - len(Gn)))
-
- start_time = time.time()
-
- # # get vertex and edge concatenated labels for each graph
- # label_list, d = getLabels(Gn, node_label, edge_label, ds_attrs['is_directed'])
- # gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed'])
-
- if compute_method == 'sylvester':
- import warnings
- warnings.warn('All labels are ignored.')
- Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs)
-
- elif compute_method == 'conjugate':
- Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs,
- node_kernels, edge_kernels,
- node_label, edge_label, eweight, n_jobs)
-
- elif compute_method == 'fp':
- Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels,
- edge_kernels, node_label, edge_label,
- eweight, n_jobs)
-
- elif compute_method == 'spectral':
- import warnings
- warnings.warn('All labels are ignored. Only works for undirected graphs.')
- Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs)
-
- elif compute_method == 'kron':
- for i in range(0, len(Gn)):
- for j in range(i, len(Gn)):
- Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
- node_label, edge_label)
- Kmatrix[j][i] = Kmatrix[i][j]
- else:
- raise Exception(
- 'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
- )
-
- run_time = time.time() - start_time
- print(
- "\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
- % (len(Gn), run_time))
-
- return Kmatrix, run_time, idx
-
-
- ###############################################################################
- def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs):
- """Calculate walk graph kernels up to n between 2 graphs using Sylvester method.
-
- Parameters
- ----------
- G1, G2 : NetworkX graph
- Graphs between which the kernel is calculated.
- node_label : string
- node attribute used as label.
- edge_label : string
- edge attribute used as label.
-
- Return
- ------
- kernel : float
- Kernel between 2 graphs.
- """
- Kmatrix = np.zeros((len(Gn), len(Gn)))
-
- if q == None:
- # don't normalize adjacency matrices if q is a uniform vector. Note
- # A_wave_list accually contains the transposes of the adjacency matrices.
- A_wave_list = [
- nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm(
- Gn, desc='compute adjacency matrices', file=sys.stdout)
- ]
- # # normalized adjacency matrices
- # A_wave_list = []
- # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
- # A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
- # norm = A_tilde.sum(axis=0)
- # norm[norm == 0] = 1
- # A_wave_list.append(A_tilde / norm)
- if p == None: # p is uniform distribution as default.
- def init_worker(Awl_toshare):
- global G_Awl
- G_Awl = Awl_toshare
- do_partial = partial(wrapper_se_do, lmda)
- parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
- glbv=(A_wave_list,), n_jobs=n_jobs)
-
- # pbar = tqdm(
- # total=(1 + len(Gn)) * len(Gn) / 2,
- # desc='calculating kernels',
- # file=sys.stdout)
- # for i in range(0, len(Gn)):
- # for j in range(i, len(Gn)):
- # S = lmda * A_wave_list[j]
- # T_t = A_wave_list[i]
- # # use uniform distribution if there is no prior knowledge.
- # nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
- # p_times_uni = 1 / nb_pd
- # M0 = np.full((len(A_wave_list[j]), len(A_wave_list[i])), p_times_uni)
- # X = dlyap(S, T_t, M0)
- # X = np.reshape(X, (-1, 1), order='F')
- # # use uniform distribution if there is no prior knowledge.
- # q_times = np.full((1, nb_pd), p_times_uni)
- # Kmatrix[i][j] = np.dot(q_times, X)
- # Kmatrix[j][i] = Kmatrix[i][j]
- # pbar.update(1)
-
- return Kmatrix
-
-
- def wrapper_se_do(lmda, itr):
- i = itr[0]
- j = itr[1]
- return i, j, _se_do(G_Awl[i], G_Awl[j], lmda)
-
-
- def _se_do(A_wave1, A_wave2, lmda):
- from control import dlyap
- S = lmda * A_wave2
- T_t = A_wave1
- # use uniform distribution if there is no prior knowledge.
- nb_pd = len(A_wave1) * len(A_wave2)
- p_times_uni = 1 / nb_pd
- M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni)
- X = dlyap(S, T_t, M0)
- X = np.reshape(X, (-1, 1), order='F')
- # use uniform distribution if there is no prior knowledge.
- q_times = np.full((1, nb_pd), p_times_uni)
- return np.dot(q_times, X)
-
-
- ###############################################################################
- def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
- node_label, edge_label, eweight, n_jobs):
- """Calculate walk graph kernels up to n between 2 graphs using conjugate method.
-
- Parameters
- ----------
- G1, G2 : NetworkX graph
- Graphs between which the kernel is calculated.
- node_label : string
- node attribute used as label.
- edge_label : string
- edge attribute used as label.
-
- Return
- ------
- kernel : float
- Kernel between 2 graphs.
- """
- Kmatrix = np.zeros((len(Gn), len(Gn)))
-
- # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
- # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1:
- # # this is faster from unlabeled graphs. @todo: why?
- # if q == None:
- # # don't normalize adjacency matrices if q is a uniform vector. Note
- # # A_wave_list accually contains the transposes of the adjacency matrices.
- # A_wave_list = [
- # nx.adjacency_matrix(G, eweight).todense().transpose() for G in
- # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
- # ]
- # if p == None: # p is uniform distribution as default.
- # def init_worker(Awl_toshare):
- # global G_Awl
- # G_Awl = Awl_toshare
- # do_partial = partial(wrapper_cg_unlabled_do, lmda)
- # parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
- # glbv=(A_wave_list,), n_jobs=n_jobs)
- # else:
- # reindex nodes using consecutive integers for convenience of kernel calculation.
- Gn = [nx.convert_node_labels_to_integers(
- g, first_label=0, label_attribute='label_orignal') for g in tqdm(
- Gn, desc='reindex vertices', file=sys.stdout)]
-
- if p == None and q == None: # p and q are uniform distributions as default.
- def init_worker(gn_toshare):
- global G_gn
- G_gn = gn_toshare
- do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels,
- node_label, edge_kernels, edge_label, lmda)
- parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
- glbv=(Gn,), n_jobs=n_jobs)
-
- # pbar = tqdm(
- # total=(1 + len(Gn)) * len(Gn) / 2,
- # desc='calculating kernels',
- # file=sys.stdout)
- # for i in range(0, len(Gn)):
- # for j in range(i, len(Gn)):
- # result = _cg_labled_do(Gn[i], Gn[j], ds_attrs, node_kernels,
- # node_label, edge_kernels, edge_label, lmda)
- # Kmatrix[i][j] = result
- # Kmatrix[j][i] = Kmatrix[i][j]
- # pbar.update(1)
- return Kmatrix
-
-
- def wrapper_cg_unlabled_do(lmda, itr):
- i = itr[0]
- j = itr[1]
- return i, j, _cg_unlabled_do(G_Awl[i], G_Awl[j], lmda)
-
-
- def _cg_unlabled_do(A_wave1, A_wave2, lmda):
- nb_pd = len(A_wave1) * len(A_wave2)
- p_times_uni = 1 / nb_pd
- w_times = kron(A_wave1, A_wave2).todense()
- A = identity(w_times.shape[0]) - w_times * lmda
- b = np.full((nb_pd, 1), p_times_uni)
- x, _ = cg(A, b)
- # use uniform distribution if there is no prior knowledge.
- q_times = np.full((1, nb_pd), p_times_uni)
- return np.dot(q_times, x)
-
-
- def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
- edge_label, lmda, itr):
- i = itr[0]
- j = itr[1]
- return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
- node_label, edge_kernels, edge_label, lmda)
-
-
- def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
- edge_kernels, edge_label, lmda):
- # Frist, ompute kernels between all pairs of nodes, method borrowed
- # from FCSP. It is faster than directly computing all edge kernels
- # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
- # graphs compared, which is the most case we went though. For very
- # sparse graphs, this would be slow.
- vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
-
- # Compute weight matrix of the direct product graph.
- w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
- edge_kernels, edge_label)
- # use uniform distribution if there is no prior knowledge.
- p_times_uni = 1 / w_dim
- A = identity(w_times.shape[0]) - w_times * lmda
- b = np.full((w_dim, 1), p_times_uni)
- x, _ = cg(A, b)
- # use uniform distribution if there is no prior knowledge.
- q_times = np.full((1, w_dim), p_times_uni)
- return np.dot(q_times, x)
-
-
- ###############################################################################
- def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
- node_label, edge_label, eweight, n_jobs):
- """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.
-
- Parameters
- ----------
- G1, G2 : NetworkX graph
- Graphs between which the kernel is calculated.
- node_label : string
- node attribute used as label.
- edge_label : string
- edge attribute used as label.
-
- Return
- ------
- kernel : float
- Kernel between 2 graphs.
- """
-
-
- Kmatrix = np.zeros((len(Gn), len(Gn)))
-
- # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
- # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1:
- # # this is faster from unlabeled graphs. @todo: why?
- # if q == None:
- # # don't normalize adjacency matrices if q is a uniform vector. Note
- # # A_wave_list accually contains the transposes of the adjacency matrices.
- # A_wave_list = [
- # nx.adjacency_matrix(G, eweight).todense().transpose() for G in
- # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
- # ]
- # if p == None: # p is uniform distribution as default.
- # pbar = tqdm(
- # total=(1 + len(Gn)) * len(Gn) / 2,
- # desc='calculating kernels',
- # file=sys.stdout)
- # for i in range(0, len(Gn)):
- # for j in range(i, len(Gn)):
- # # use uniform distribution if there is no prior knowledge.
- # nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
- # p_times_uni = 1 / nb_pd
- # w_times = kron(A_wave_list[i], A_wave_list[j]).todense()
- # p_times = np.full((nb_pd, 1), p_times_uni)
- # x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times))
- # # use uniform distribution if there is no prior knowledge.
- # q_times = np.full((1, nb_pd), p_times_uni)
- # Kmatrix[i][j] = np.dot(q_times, x)
- # Kmatrix[j][i] = Kmatrix[i][j]
- # pbar.update(1)
- # else:
- # reindex nodes using consecutive integers for convenience of kernel calculation.
- Gn = [nx.convert_node_labels_to_integers(
- g, first_label=0, label_attribute='label_orignal') for g in tqdm(
- Gn, desc='reindex vertices', file=sys.stdout)]
-
- if p == None and q == None: # p and q are uniform distributions as default.
- def init_worker(gn_toshare):
- global G_gn
- G_gn = gn_toshare
- do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels,
- node_label, edge_kernels, edge_label, lmda)
- parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
- glbv=(Gn,), n_jobs=n_jobs)
- return Kmatrix
-
-
- def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
- edge_label, lmda, itr):
- i = itr[0]
- j = itr[1]
- return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
- node_label, edge_kernels, edge_label, lmda)
-
-
- def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
- edge_kernels, edge_label, lmda):
- # Frist, ompute kernels between all pairs of nodes, method borrowed
- # from FCSP. It is faster than directly computing all edge kernels
- # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
- # graphs compared, which is the most case we went though. For very
- # sparse graphs, this would be slow.
- vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
-
- # Compute weight matrix of the direct product graph.
- w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
- edge_kernels, edge_label)
- # use uniform distribution if there is no prior knowledge.
- p_times_uni = 1 / w_dim
- p_times = np.full((w_dim, 1), p_times_uni)
- x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times),
- xtol=1e-06, maxiter=1000)
- # use uniform distribution if there is no prior knowledge.
- q_times = np.full((1, w_dim), p_times_uni)
- return np.dot(q_times, x)
-
-
- def func_fp(x, p_times, lmda, w_times):
- haha = w_times * x
- haha = lmda * haha
- haha = p_times + haha
- return p_times + lmda * np.dot(w_times, x)
-
-
- ###############################################################################
- def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs):
- """Calculate walk graph kernels up to n between 2 unlabeled graphs using
- spectral decomposition method. Labels will be ignored.
-
- Parameters
- ----------
- G1, G2 : NetworkX graph
- Graphs between which the kernel is calculated.
- node_label : string
- node attribute used as label.
- edge_label : string
- edge attribute used as label.
-
- Return
- ------
- kernel : float
- Kernel between 2 graphs.
- """
- Kmatrix = np.zeros((len(Gn), len(Gn)))
-
- if q == None:
- # precompute the spectral decomposition of each graph.
- P_list = []
- D_list = []
- for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout):
- # don't normalize adjacency matrices if q is a uniform vector. Note
- # A accually is the transpose of the adjacency matrix.
- A = nx.adjacency_matrix(G, eweight).todense().transpose()
- ew, ev = np.linalg.eig(A)
- D_list.append(ew)
- P_list.append(ev)
- # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?
-
- if p == None: # p is uniform distribution as default.
- q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn]
- # q_T_list = [q.T for q in q_list]
- def init_worker(q_T_toshare, P_toshare, D_toshare):
- global G_q_T, G_P, G_D
- G_q_T = q_T_toshare
- G_P = P_toshare
- G_D = D_toshare
- do_partial = partial(wrapper_sd_do, weight, sub_kernel)
- parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
- glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs)
-
-
- # pbar = tqdm(
- # total=(1 + len(Gn)) * len(Gn) / 2,
- # desc='calculating kernels',
- # file=sys.stdout)
- # for i in range(0, len(Gn)):
- # for j in range(i, len(Gn)):
- # result = _sd_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j],
- # D_list[i], D_list[j], weight, sub_kernel)
- # Kmatrix[i][j] = result
- # Kmatrix[j][i] = Kmatrix[i][j]
- # pbar.update(1)
- return Kmatrix
-
-
- def wrapper_sd_do(weight, sub_kernel, itr):
- i = itr[0]
- j = itr[1]
- return i, j, _sd_do(G_q_T[i], G_q_T[j], G_P[i], G_P[j], G_D[i], G_D[j],
- weight, sub_kernel)
-
-
- def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
- # use uniform distribution if there is no prior knowledge.
- kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
- # @todo: this is not be needed when p = q (kr = kl.T) for undirected graphs
- # kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense()
- if sub_kernel == 'exp':
- D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
- kmiddle = np.diag(np.exp(weight * D_diag))
- elif sub_kernel == 'geo':
- D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
- kmiddle = np.diag(weight * D_diag)
- kmiddle = np.identity(len(kmiddle)) - weight * kmiddle
- kmiddle = np.linalg.inv(kmiddle)
- return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0]
-
-
- ###############################################################################
- def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
- """Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.
-
- Parameters
- ----------
- G1, G2 : NetworkX graph
- Graphs between which the kernel is calculated.
- node_label : string
- node attribute used as label.
- edge_label : string
- edge attribute used as label.
-
- Return
- ------
- kernel : float
- Kernel between 2 graphs.
- """
- pass
-
-
- ###############################################################################
- def getLabels(Gn, node_label, edge_label, directed):
- """Get symbolic labels of a graph dataset, where vertex labels are dealt
- with by concatenating them to the edge labels of adjacent edges.
- """
- label_list = []
- label_set = set()
- for g in Gn:
- label_g = {}
- for e in g.edges(data=True):
- nl1 = g.node[e[0]][node_label]
- nl2 = g.node[e[1]][node_label]
- if not directed and nl1 > nl2:
- nl1, nl2 = nl2, nl1
- label = (nl1, e[2][edge_label], nl2)
- label_g[(e[0], e[1])] = label
- label_list.append(label_g)
- label_set = set([l for lg in label_list for l in lg.values()])
- return label_list, len(label_set)
-
-
- def filterGramMatrix(gmt, label_dict, label, directed):
- """Compute (the transpose of) the Gram matrix filtered by a label.
- """
- gmf = np.zeros(gmt.shape)
- for (n1, n2), l in label_dict.items():
- if l == label:
- gmf[n2, n1] = gmt[n2, n1]
- if not directed:
- gmf[n1, n2] = gmt[n1, n2]
- return gmf
-
-
- def computeVK(g1, g2, ds_attrs, node_kernels, node_label):
- '''Compute vertex kernels between vertices of two graphs.
- '''
- vk_dict = {} # shortest path matrices dict
- if ds_attrs['node_labeled']:
- # node symb and non-synb labeled
- if ds_attrs['node_attr_dim'] > 0:
- kn = node_kernels['mix']
- for n1 in g1.nodes(data=True):
- for n2 in g2.nodes(data=True):
- vk_dict[(n1[0], n2[0])] = kn(
- n1[1][node_label], n2[1][node_label],
- n1[1]['attributes'], n2[1]['attributes'])
- # node symb labeled
- else:
- kn = node_kernels['symb']
- for n1 in g1.nodes(data=True):
- for n2 in g2.nodes(data=True):
- vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
- n2[1][node_label])
- else:
- # node non-synb labeled
- if ds_attrs['node_attr_dim'] > 0:
- kn = node_kernels['nsymb']
- for n1 in g1.nodes(data=True):
- for n2 in g2.nodes(data=True):
- vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
- n2[1]['attributes'])
- # node unlabeled
- else:
- pass
- return vk_dict
-
-
- def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):
- '''Compute weight matrix of the direct product graph.
- '''
- w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
- w_times = np.zeros((w_dim, w_dim))
- if vk_dict: # node labeled
- if ds_attrs['is_directed']:
- if ds_attrs['edge_labeled']:
- # edge symb and non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['mix']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
- e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * ek_temp * vk_dict[(e1[1], e2[1])]
- # edge symb labeled
- else:
- ke = edge_kernels['symb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * ek_temp * vk_dict[(e1[1], e2[1])]
- else:
- # edge non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['nsymb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * ek_temp * vk_dict[(e1[1], e2[1])]
- # edge unlabeled
- else:
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * vk_dict[(e1[1], e2[1])]
- else: # undirected
- if ds_attrs['edge_labeled']:
- # edge symb and non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['mix']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
- e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * ek_temp * vk_dict[(e1[1], e2[1])] \
- + vk_dict[(e1[0], e2[1])] \
- * ek_temp * vk_dict[(e1[1], e2[0])]
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- # edge symb labeled
- else:
- ke = edge_kernels['symb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * ek_temp * vk_dict[(e1[1], e2[1])] \
- + vk_dict[(e1[0], e2[1])] \
- * ek_temp * vk_dict[(e1[1], e2[0])]
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- else:
- # edge non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['nsymb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * ek_temp * vk_dict[(e1[1], e2[1])] \
- + vk_dict[(e1[0], e2[1])] \
- * ek_temp * vk_dict[(e1[1], e2[0])]
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- # edge unlabeled
- else:
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
- * vk_dict[(e1[1], e2[1])] \
- + vk_dict[(e1[0], e2[1])] \
- * vk_dict[(e1[1], e2[0])]
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- else: # node unlabeled
- if ds_attrs['is_directed']:
- if ds_attrs['edge_labeled']:
- # edge symb and non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['mix']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
- e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = ek_temp
- # edge symb labeled
- else:
- ke = edge_kernels['symb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = ek_temp
- else:
- # edge non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['nsymb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = ek_temp
- # edge unlabeled
- else:
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = 1
- else: # undirected
- if ds_attrs['edge_labeled']:
- # edge symb and non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['mix']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
- e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = ek_temp
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- # edge symb labeled
- else:
- ke = edge_kernels['symb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = ek_temp
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- else:
- # edge non-synb labeled
- if ds_attrs['edge_attr_dim'] > 0:
- ke = edge_kernels['nsymb']
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = ek_temp
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- # edge unlabeled
- else:
- for e1 in g1.edges(data=True):
- for e2 in g2.edges(data=True):
- w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
- e1[1] * nx.number_of_nodes(g2) + e2[1])
- w_times[w_idx] = 1
- w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
- w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
- e1[1] * nx.number_of_nodes(g2) + e2[0])
- w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
- w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
- return w_times, w_dim
|