From d8e206c33c2dada22b8fc4bf7bb7532d2d16364d Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Mon, 21 Sep 2020 17:32:23 +0200 Subject: [PATCH] Add graph synthesizer. --- .../papers/PRL_2020/synthesized_graphs_N.py | 138 +++++++++++++++++++++ gklearn/utils/graph_synthesizer.py | 49 ++++++++ gklearn/utils/parallel.py | 6 +- 3 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py create mode 100644 gklearn/utils/graph_synthesizer.py diff --git a/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py b/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py new file mode 100644 index 0000000..c2a1e54 --- /dev/null +++ b/gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Sep 21 10:34:26 2020 + +@author: ljia +""" +Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'CommonWalk'] +# Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree'] + +def generate_graphs(): + from gklearn.utils.graph_synthesizer import GraphSynthesizer + gsyzer = GraphSynthesizer() + graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) + return graphs + + +def compute_graph_kernel(graphs, kernel_name): + import multiprocessing + + if kernel_name == 'CommonWalk': + from gklearn.kernels.commonWalkKernel import commonwalkkernel + estimator = commonwalkkernel + params = {'compute_method': 'geo', 'weight': 0.1} + + elif kernel_name == 'Marginalized': + from gklearn.kernels.marginalizedKernel import marginalizedkernel + estimator = marginalizedkernel + params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False} + + elif kernel_name == 'SylvesterEquation': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + params = {'compute_method': 'sylvester', 'weight': 0.1} + + elif kernel_name == 'ConjugateGradient': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} + + elif kernel_name == 'FixedPoint': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} + + elif kernel_name == 'SpectralDecomposition': + from gklearn.kernels.randomWalkKernel import randomwalkkernel + estimator = randomwalkkernel + params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1} + + elif kernel_name == 'ShortestPath': + from gklearn.kernels.spKernel import spkernel + estimator = spkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'node_kernels': sub_kernel} + + elif kernel_name == 'StructuralSP': + from gklearn.kernels.structuralspKernel import structuralspkernel + estimator = structuralspkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} + + elif kernel_name == 'PathUpToH': + from gklearn.kernels.untilHPathKernel import untilhpathkernel + estimator = untilhpathkernel + params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'} + + elif kernel_name == 'Treelet': + from gklearn.kernels.treeletKernel import treeletkernel + estimator = treeletkernel + from gklearn.utils.kernels import polynomialkernel + import functools + sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8) + params = {'sub_kernel': sub_kernel} + + elif kernel_name == 'WLSubtree': + from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel + estimator = weisfeilerlehmankernel + params = {'base_kernel': 'subtree', 'height': 5} + + params['n_jobs'] = multiprocessing.cpu_count() + params['verbose'] = True + results = estimator(graphs, **params) + + return results[0], results[1] + + +def xp_synthesied_graphs_dataset_size(): + + # Generate graphs. + graphs = generate_graphs() + + # Run and save. + import pickle + import os + save_dir = 'outputs/' + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + run_times = {} + + for kernel_name in Graph_Kernel_List: + print() + print('Kernel:', kernel_name) + + run_times[kernel_name] = [] + for num_graphs in [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]: + print() + print('Number of graphs:', num_graphs) + + sub_graphs = [g.copy() for g in graphs[0:num_graphs]] + gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name) + run_times[kernel_name].append(run_time) + + pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_graphs) + '.pkl', 'wb')) + + # Save all. + pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) + + return + + +if __name__ == '__main__': + xp_synthesied_graphs_dataset_size() \ No newline at end of file diff --git a/gklearn/utils/graph_synthesizer.py b/gklearn/utils/graph_synthesizer.py new file mode 100644 index 0000000..1fa62a7 --- /dev/null +++ b/gklearn/utils/graph_synthesizer.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 11 18:10:06 2020 + +@author: ljia +""" +import numpy as np +import networkx as nx +import random + + +class GraphSynthesizer(object): + + + def __init__(self): + pass + + + def unified_graphs(self, num_graphs=1000, num_nodes=100, num_edges=196, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): + max_num_edges = int((num_nodes - 1) * num_nodes / 2) + if num_edges > max_num_edges: + raise Exception('Too many edges.') + all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. + + graphs = [] + for idx in range(0, num_graphs): + g = nx.Graph() + if num_node_labels > 0: + for i in range(0, num_nodes): + node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) + g.add_node(str(i), node_label=node_labels[i]) + else: + for i in range(0, num_nodes): + g.add_node(str(i)) + + if num_edge_labels > 0: + edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) + for i in random.sample(range(0, max_num_edges), num_edges): + node1, node2 = all_edges[i] + g.add_edge(node1, node2, edge_label=edge_labels[i]) + else: + for i in random.sample(range(0, max_num_edges), num_edges): + node1, node2 = all_edges[i] + g.add_edge(node1, node2) + + graphs.append(g) + + return graphs \ No newline at end of file diff --git a/gklearn/utils/parallel.py b/gklearn/utils/parallel.py index 4c29522..e6edb70 100644 --- a/gklearn/utils/parallel.py +++ b/gklearn/utils/parallel.py @@ -12,7 +12,7 @@ import sys def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None, glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc='', - verbose=2): + verbose=True): ''' ''' if method == 'imap_unordered': @@ -30,7 +30,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker else: chunksize = 100 for result in (tqdm(pool.imap_unordered(func, itr, chunksize), - desc=itr_desc, file=sys.stdout) if verbose == 2 else + desc=itr_desc, file=sys.stdout) if verbose else pool.imap_unordered(func, itr, chunksize)): func_assign(result, var_to_assign) pool.close() @@ -45,7 +45,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker else: chunksize = 100 for result in (tqdm(pool.imap_unordered(func, itr, chunksize), - desc=itr_desc, file=sys.stdout) if verbose == 2 else + desc=itr_desc, file=sys.stdout) if verbose else pool.imap_unordered(func, itr, chunksize)): func_assign(result, var_to_assign) pool.close()