Browse Source

Add graph synthesizer.

v0.2.x
jajupmochi 4 years ago
parent
commit
d8e206c33c
3 changed files with 190 additions and 3 deletions
  1. +138
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py
  2. +49
    -0
      gklearn/utils/graph_synthesizer.py
  3. +3
    -3
      gklearn/utils/parallel.py

+ 138
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'CommonWalk']
# Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree']

def generate_graphs():
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def compute_graph_kernel(graphs, kernel_name):
import multiprocessing
if kernel_name == 'CommonWalk':
from gklearn.kernels.commonWalkKernel import commonwalkkernel
estimator = commonwalkkernel
params = {'compute_method': 'geo', 'weight': 0.1}
elif kernel_name == 'Marginalized':
from gklearn.kernels.marginalizedKernel import marginalizedkernel
estimator = marginalizedkernel
params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False}
elif kernel_name == 'SylvesterEquation':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
params = {'compute_method': 'sylvester', 'weight': 0.1}
elif kernel_name == 'ConjugateGradient':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'FixedPoint':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'SpectralDecomposition':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1}
elif kernel_name == 'ShortestPath':
from gklearn.kernels.spKernel import spkernel
estimator = spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'node_kernels': sub_kernel}
elif kernel_name == 'StructuralSP':
from gklearn.kernels.structuralspKernel import structuralspkernel
estimator = structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'PathUpToH':
from gklearn.kernels.untilHPathKernel import untilhpathkernel
estimator = untilhpathkernel
params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'}
elif kernel_name == 'Treelet':
from gklearn.kernels.treeletKernel import treeletkernel
estimator = treeletkernel
from gklearn.utils.kernels import polynomialkernel
import functools
sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8)
params = {'sub_kernel': sub_kernel}
elif kernel_name == 'WLSubtree':
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
estimator = weisfeilerlehmankernel
params = {'base_kernel': 'subtree', 'height': 5}
params['n_jobs'] = multiprocessing.cpu_count()
params['verbose'] = True
results = estimator(graphs, **params)
return results[0], results[1]


def xp_synthesied_graphs_dataset_size():
# Generate graphs.
graphs = generate_graphs()
# Run and save.
import pickle
import os
save_dir = 'outputs/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for num_graphs in [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]:
print()
print('Number of graphs:', num_graphs)
sub_graphs = [g.copy() for g in graphs[0:num_graphs]]
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_graphs) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_dataset_size()

+ 49
- 0
gklearn/utils/graph_synthesizer.py View File

@@ -0,0 +1,49 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 11 18:10:06 2020

@author: ljia
"""
import numpy as np
import networkx as nx
import random


class GraphSynthesizer(object):
def __init__(self):
pass
def unified_graphs(self, num_graphs=1000, num_nodes=100, num_edges=196, num_node_labels=0, num_edge_labels=0, seed=None, directed=False):
max_num_edges = int((num_nodes - 1) * num_nodes / 2)
if num_edges > max_num_edges:
raise Exception('Too many edges.')
all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs.
graphs = []
for idx in range(0, num_graphs):
g = nx.Graph()
if num_node_labels > 0:
for i in range(0, num_nodes):
node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes)
g.add_node(str(i), node_label=node_labels[i])
else:
for i in range(0, num_nodes):
g.add_node(str(i))

if num_edge_labels > 0:
edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges)
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(node1, node2, edge_label=edge_labels[i])
else:
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(node1, node2)
graphs.append(g)
return graphs

+ 3
- 3
gklearn/utils/parallel.py View File

@@ -12,7 +12,7 @@ import sys


def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None, def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None,
glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc='', glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc='',
verbose=2):
verbose=True):
''' '''
''' '''
if method == 'imap_unordered': if method == 'imap_unordered':
@@ -30,7 +30,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
else: else:
chunksize = 100 chunksize = 100
for result in (tqdm(pool.imap_unordered(func, itr, chunksize), for result in (tqdm(pool.imap_unordered(func, itr, chunksize),
desc=itr_desc, file=sys.stdout) if verbose == 2 else
desc=itr_desc, file=sys.stdout) if verbose else
pool.imap_unordered(func, itr, chunksize)): pool.imap_unordered(func, itr, chunksize)):
func_assign(result, var_to_assign) func_assign(result, var_to_assign)
pool.close() pool.close()
@@ -45,7 +45,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
else: else:
chunksize = 100 chunksize = 100
for result in (tqdm(pool.imap_unordered(func, itr, chunksize), for result in (tqdm(pool.imap_unordered(func, itr, chunksize),
desc=itr_desc, file=sys.stdout) if verbose == 2 else
desc=itr_desc, file=sys.stdout) if verbose else
pool.imap_unordered(func, itr, chunksize)): pool.imap_unordered(func, itr, chunksize)):
func_assign(result, var_to_assign) func_assign(result, var_to_assign)
pool.close() pool.close()


Loading…
Cancel
Save