From 21b99f47305ffbe04b006f736831d6f9d071c10c Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Tue, 22 Sep 2020 11:55:18 +0200 Subject: [PATCH] Update graph synthesizer. --- .../papers/PRL_2020/synthesized_graphs_N.py | 2 +- .../PRL_2020/synthesized_graphs_num_nodes.py | 54 +++++++++++++ gklearn/experiments/papers/PRL_2020/utils.py | 93 ++++++++++++++++++++++ gklearn/utils/graph_synthesizer.py | 48 ++++++----- 4 files changed, 174 insertions(+), 23 deletions(-) create mode 100644 gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py create mode 100644 gklearn/experiments/papers/PRL_2020/utils.py diff --git a/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py b/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py index c2a1e54..ee0da19 100644 --- a/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py +++ b/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py @@ -107,7 +107,7 @@ def xp_synthesied_graphs_dataset_size(): # Run and save. import pickle import os - save_dir = 'outputs/' + save_dir = 'outputs/synthesized_graphs_N/' if not os.path.exists(save_dir): os.makedirs(save_dir) diff --git a/gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py b/gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py new file mode 100644 index 0000000..24ba722 --- /dev/null +++ b/gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Sep 21 10:34:26 2020 + +@author: ljia +""" +from utils import Graph_Kernel_List, compute_graph_kernel + + +def generate_graphs(num_nodes): + from gklearn.utils.graph_synthesizer import GraphSynthesizer + gsyzer = GraphSynthesizer() + graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=num_nodes, num_edges=int(num_nodes*2), num_node_labels=0, num_edge_labels=0, seed=None, directed=False) + return graphs + + +def xp_synthesied_graphs_num_nodes(): + + # Run and save. + import pickle + import os + save_dir = 'outputs/synthesized_graphs_num_nodes/' + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + run_times = {} + + for kernel_name in Graph_Kernel_List: + print() + print('Kernel:', kernel_name) + + run_times[kernel_name] = [] + for num_nodes in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: + print() + print('Number of nodes:', num_nodes) + + # Generate graphs. + graphs = generate_graphs(num_nodes) + + # Compute Gram matrix. + gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) + run_times[kernel_name].append(run_time) + + pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nodes) + '.pkl', 'wb')) + + # Save all. + pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) + + return + + +if __name__ == '__main__': + xp_synthesied_graphs_num_nodes() diff --git a/gklearn/experiments/papers/PRL_2020/utils.py b/gklearn/experiments/papers/PRL_2020/utils.py new file mode 100644 index 0000000..1e56a0b --- /dev/null +++ b/gklearn/experiments/papers/PRL_2020/utils.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 22 11:33:28 2020 + +@author: ljia +""" +Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk'] +# Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree'] + + +def compute_graph_kernel(graphs, kernel_name): + import multiprocessing + + if kernel_name == 'CommonWalk': + from gklearn.kernels.commonWalkKernel import commonwalkkernel + estimator = commonwalkkernel + params = {'compute_method': 'geo', 'weight': 0.1} + + elif kernel_name == 'Marginalized': + from gklearn.kernels.marginalizedKernel import marginalizedkernel + estimator = marginalizedkernel + params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False} + + elif kernel_name == 'SylvesterEquation': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + params = {'compute_method': 'sylvester', 'weight': 0.1} + + elif kernel_name == 'ConjugateGradient': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} + + elif kernel_name == 'FixedPoint': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} + + elif kernel_name == 'SpectralDecomposition': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1} + + elif kernel_name == 'ShortestPath': + from gklearn.kernels.spKernel import spkernel + estimator = spkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'node_kernels': sub_kernel} + + elif kernel_name == 'StructuralSP': + from gklearn.kernels.structuralspKernel import structuralspkernel + estimator = structuralspkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} + + elif kernel_name == 'PathUpToH': + from gklearn.kernels.untilHPathKernel import untilhpathkernel + estimator = untilhpathkernel + params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'} + + elif kernel_name == 'Treelet': + from gklearn.kernels.treeletKernel import treeletkernel + estimator = treeletkernel + from gklearn.utils.kernels import polynomialkernel + import functools + sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8) + params = {'sub_kernel': sub_kernel} + + elif kernel_name == 'WLSubtree': + from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel + estimator = weisfeilerlehmankernel + params = {'base_kernel': 'subtree', 'height': 5} + + params['n_jobs'] = multiprocessing.cpu_count() + params['verbose'] = True + results = estimator(graphs, **params) + + return results[0], results[1] \ No newline at end of file diff --git a/gklearn/utils/graph_synthesizer.py b/gklearn/utils/graph_synthesizer.py index 1fa62a7..0b0821e 100644 --- a/gklearn/utils/graph_synthesizer.py +++ b/gklearn/utils/graph_synthesizer.py @@ -17,33 +17,37 @@ class GraphSynthesizer(object): pass - def unified_graphs(self, num_graphs=1000, num_nodes=100, num_edges=196, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): + def random_graph(self, num_nodes, num_edges, num_node_labels=0, num_edge_labels=0, seed=None, directed=False, max_num_edges=None, all_edges=None): + g = nx.Graph() + if num_node_labels > 0: + for i in range(0, num_nodes): + node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) + g.add_node(str(i), node_label=node_labels[i]) + else: + for i in range(0, num_nodes): + g.add_node(str(i)) + + if num_edge_labels > 0: + edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) + for i in random.sample(range(0, max_num_edges), num_edges): + node1, node2 = all_edges[i] + g.add_edge(node1, node2, edge_label=edge_labels[i]) + else: + for i in random.sample(range(0, max_num_edges), num_edges): + node1, node2 = all_edges[i] + g.add_edge(node1, node2) + + return g + + + def unified_graphs(self, num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): max_num_edges = int((num_nodes - 1) * num_nodes / 2) if num_edges > max_num_edges: raise Exception('Too many edges.') all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. graphs = [] - for idx in range(0, num_graphs): - g = nx.Graph() - if num_node_labels > 0: - for i in range(0, num_nodes): - node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) - g.add_node(str(i), node_label=node_labels[i]) - else: - for i in range(0, num_nodes): - g.add_node(str(i)) - - if num_edge_labels > 0: - edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) - for i in random.sample(range(0, max_num_edges), num_edges): - node1, node2 = all_edges[i] - g.add_edge(node1, node2, edge_label=edge_labels[i]) - else: - for i in random.sample(range(0, max_num_edges), num_edges): - node1, node2 = all_edges[i] - g.add_edge(node1, node2) - - graphs.append(g) + for idx in range(0, num_graphs): + graphs.append(self.random_graph(num_nodes, num_edges, num_node_labels=num_node_labels, num_edge_labels=num_edge_labels, seed=seed, directed=directed, max_num_edges=max_num_edges, all_edges=all_edges)) return graphs \ No newline at end of file