Browse Source

Update get_graph_kernel_by_name().

v0.2.x
jajupmochi 4 years ago
parent
commit
cdcccb2e32
1 changed files with 48 additions and 46 deletions
  1. +48
    -46
      gklearn/utils/utils.py

+ 48
- 46
gklearn/utils/utils.py View File

@@ -101,7 +101,7 @@ def get_shortest_paths(G, weight, directed):
# each edge walk is counted twice, starting from both its extreme nodes.
if not directed:
sp += [sptemp[::-1] for sptemp in spltemp]
# add single nodes as length 0 paths.
sp += [[n] for n in G.nodes()]
return sp
@@ -233,7 +233,7 @@ def direct_product_graph(G1, G2, node_labels, edge_labels):
A list of node attributes used as labels.
edge_labels : list
A list of edge attributes used as labels.
Return
------
gt : NetworkX graph
@@ -287,9 +287,9 @@ def direct_product_graph(G1, G2, node_labels, edge_labels):


def graph_deepcopy(G):
"""Deep copy a graph, including deep copy of all nodes, edges and
"""Deep copy a graph, including deep copy of all nodes, edges and
attributes of the graph, nodes and edges.
Note
----
It is the same as the NetworkX function graph.copy(), as far as I know.
@@ -302,28 +302,28 @@ def graph_deepcopy(G):
G_copy = nx.DiGraph(**labels)
else:
G_copy = nx.Graph(**labels)
# add nodes
# add nodes
for nd, attrs in G.nodes(data=True):
labels = {}
for k, v in attrs.items():
labels[k] = deepcopy(v)
G_copy.add_node(nd, **labels)
# add edges.
for nd1, nd2, attrs in G.edges(data=True):
labels = {}
for k, v in attrs.items():
labels[k] = deepcopy(v)
G_copy.add_edge(nd1, nd2, **labels)
return G_copy


def graph_isIdentical(G1, G2):
"""Check if two graphs are identical, including: same nodes, edges, node
labels/attributes, edge labels/attributes.
Notes
-----
1. The type of graphs has to be the same.
@@ -341,7 +341,7 @@ def graph_isIdentical(G1, G2):
if not elist1 == elist2:
return False
# check graph attributes.
return True


@@ -363,7 +363,9 @@ def get_edge_labels(Gn, edge_label):
return el


def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}, **kwargs):
if len(kwargs) != 0:
kernel_options = kwargs
if name == 'Marginalized':
from gklearn.kernels import Marginalized
graph_kernel = Marginalized(node_labels=node_labels,
@@ -379,7 +381,7 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr
elif name == 'StructuralSP':
from gklearn.kernels import StructuralSP
graph_kernel = StructuralSP(node_labels=node_labels,
edge_labels=edge_labels,
edge_labels=edge_labels,
node_attrs=node_attrs,
edge_attrs=edge_attrs,
ds_infos=ds_infos,
@@ -417,7 +419,7 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False):
import os
from gklearn.utils import Dataset, split_dataset_by_target
# 1. get dataset.
print('1. getting dataset...')
dataset_all = Dataset()
@@ -427,20 +429,20 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d
dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 10))
datasets = split_dataset_by_target(dataset_all)
gram_matrix_unnorm_list = []
run_time_list = []
print('start generating preimage for each class of target...')
for idx, dataset in enumerate(datasets):
target = dataset.targets[0]
print('\ntarget =', target, '\n')
# 2. initialize graph kernel.
print('2. initializing graph kernel and setting parameters...')
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
@@ -450,24 +452,24 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d
print('3. computing gram matrix...')
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
gram_matrix_unnorm_list.append(gram_matrix_unnorm)
run_time_list.append(run_time)
# 4. save results.
print()
print('4. saving results...')
if save_results:
os.makedirs(dir_save, exist_ok=True)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)

print('\ncomplete.')


print('\ncomplete.')
def find_paths(G, source_node, length):
"""Find all paths with a certain length those start from a source node.
"""Find all paths with a certain length those start from a source node.
A recursive depth first search is applied.
Parameters
----------
G : NetworkX graphs
@@ -476,7 +478,7 @@ def find_paths(G, source_node, length):
The number of the node from where all paths start.
length : integer
The length of paths.
Return
------
path : list of list
@@ -492,14 +494,14 @@ def find_paths(G, source_node, length):
def find_all_paths(G, length, is_directed):
"""Find all paths with a certain length in a graph. A recursive depth first
search is applied.
Parameters
----------
G : NetworkX graphs
The graph in which paths are searched.
length : integer
The length of paths.
Return
------
path : list of list
@@ -508,18 +510,18 @@ def find_all_paths(G, length, is_directed):
all_paths = []
for node in G:
all_paths.extend(find_paths(G, node, length))
if not is_directed:
# For each path, two presentations are retrieved from its two extremities.
# For each path, two presentations are retrieved from its two extremities.
# Remove one of them.
all_paths_r = [path[::-1] for path in all_paths]
all_paths_r = [path[::-1] for path in all_paths]
for idx, path in enumerate(all_paths[:-1]):
for path2 in all_paths_r[idx+1::]:
if path == path2:
all_paths[idx] = []
break
all_paths = list(filter(lambda a: a != [], all_paths))
return all_paths


@@ -535,8 +537,8 @@ def get_mlti_dim_edge_attrs(G, attr_names):
for ed, attrs in G.edges(data=True):
attributes.append(tuple(attrs[aname] for aname in attr_names))
return attributes
def normalize_gram_matrix(gram_matrix):
diag = gram_matrix.diagonal().copy()
for i in range(len(gram_matrix)):
@@ -544,8 +546,8 @@ def normalize_gram_matrix(gram_matrix):
gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
gram_matrix[j][i] = gram_matrix[i][j]
return gram_matrix
def compute_distance_matrix(gram_matrix):
dis_mat = np.empty((len(gram_matrix), len(gram_matrix)))
for i in range(len(gram_matrix)):
@@ -573,9 +575,9 @@ def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
g1, g2 : NetworkX graph
The kernels bewteen pairs of vertices in these two graphs are computed.
node_kernels : dict
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two nodes. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns a number
@@ -590,18 +592,18 @@ def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
-------
vk_dict : dict
Vertex kernels keyed by vertices.
Notes
-----
This function is used by ``gklearn.kernels.FixedPoint'' and
This function is used by ``gklearn.kernels.FixedPoint'' and
``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1].

References
----------
.. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
Parallelization of shortest path graph kernels on multi-core cpus and gpus.
Proceedings of the Programmability Issues for Heterogeneous Multicores
(MultiProg), Vienna, Austria, 2014.
.. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
Parallelization of shortest path graph kernels on multi-core cpus and gpus.
Proceedings of the Programmability Issues for Heterogeneous Multicores
(MultiProg), Vienna, Austria, 2014.
"""
vk_dict = {} # shortest path matrices dict
if len(node_labels) > 0:


Loading…
Cancel
Save