Browse Source

Update graph synthesizer.

v0.2.x
jajupmochi 4 years ago
parent
commit
21b99f4730
4 changed files with 174 additions and 23 deletions
  1. +1
    -1
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py
  2. +54
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py
  3. +93
    -0
      gklearn/experiments/papers/PRL_2020/utils.py
  4. +26
    -22
      gklearn/utils/graph_synthesizer.py

+ 1
- 1
gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py View File

@@ -107,7 +107,7 @@ def xp_synthesied_graphs_dataset_size():
# Run and save. # Run and save.
import pickle import pickle
import os import os
save_dir = 'outputs/'
save_dir = 'outputs/synthesized_graphs_N/'
if not os.path.exists(save_dir): if not os.path.exists(save_dir):
os.makedirs(save_dir) os.makedirs(save_dir)




+ 54
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
from utils import Graph_Kernel_List, compute_graph_kernel


def generate_graphs(num_nodes):
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=num_nodes, num_edges=int(num_nodes*2), num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_synthesied_graphs_num_nodes():
# Run and save.
import pickle
import os
save_dir = 'outputs/synthesized_graphs_num_nodes/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for num_nodes in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
print()
print('Number of nodes:', num_nodes)
# Generate graphs.
graphs = generate_graphs(num_nodes)

# Compute Gram matrix.
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nodes) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_num_nodes()

+ 93
- 0
gklearn/experiments/papers/PRL_2020/utils.py View File

@@ -0,0 +1,93 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 22 11:33:28 2020

@author: ljia
"""
Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk']
# Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree']


def compute_graph_kernel(graphs, kernel_name):
import multiprocessing
if kernel_name == 'CommonWalk':
from gklearn.kernels.commonWalkKernel import commonwalkkernel
estimator = commonwalkkernel
params = {'compute_method': 'geo', 'weight': 0.1}
elif kernel_name == 'Marginalized':
from gklearn.kernels.marginalizedKernel import marginalizedkernel
estimator = marginalizedkernel
params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False}
elif kernel_name == 'SylvesterEquation':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
params = {'compute_method': 'sylvester', 'weight': 0.1}
elif kernel_name == 'ConjugateGradient':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'FixedPoint':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'SpectralDecomposition':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1}
elif kernel_name == 'ShortestPath':
from gklearn.kernels.spKernel import spkernel
estimator = spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'node_kernels': sub_kernel}
elif kernel_name == 'StructuralSP':
from gklearn.kernels.structuralspKernel import structuralspkernel
estimator = structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'PathUpToH':
from gklearn.kernels.untilHPathKernel import untilhpathkernel
estimator = untilhpathkernel
params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'}
elif kernel_name == 'Treelet':
from gklearn.kernels.treeletKernel import treeletkernel
estimator = treeletkernel
from gklearn.utils.kernels import polynomialkernel
import functools
sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8)
params = {'sub_kernel': sub_kernel}
elif kernel_name == 'WLSubtree':
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
estimator = weisfeilerlehmankernel
params = {'base_kernel': 'subtree', 'height': 5}
params['n_jobs'] = multiprocessing.cpu_count()
params['verbose'] = True
results = estimator(graphs, **params)
return results[0], results[1]

+ 26
- 22
gklearn/utils/graph_synthesizer.py View File

@@ -17,33 +17,37 @@ class GraphSynthesizer(object):
pass pass
def unified_graphs(self, num_graphs=1000, num_nodes=100, num_edges=196, num_node_labels=0, num_edge_labels=0, seed=None, directed=False):
def random_graph(self, num_nodes, num_edges, num_node_labels=0, num_edge_labels=0, seed=None, directed=False, max_num_edges=None, all_edges=None):
g = nx.Graph()
if num_node_labels > 0:
for i in range(0, num_nodes):
node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes)
g.add_node(str(i), node_label=node_labels[i])
else:
for i in range(0, num_nodes):
g.add_node(str(i))

if num_edge_labels > 0:
edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges)
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(node1, node2, edge_label=edge_labels[i])
else:
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(node1, node2)
return g
def unified_graphs(self, num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False):
max_num_edges = int((num_nodes - 1) * num_nodes / 2) max_num_edges = int((num_nodes - 1) * num_nodes / 2)
if num_edges > max_num_edges: if num_edges > max_num_edges:
raise Exception('Too many edges.') raise Exception('Too many edges.')
all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs.
graphs = [] graphs = []
for idx in range(0, num_graphs):
g = nx.Graph()
if num_node_labels > 0:
for i in range(0, num_nodes):
node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes)
g.add_node(str(i), node_label=node_labels[i])
else:
for i in range(0, num_nodes):
g.add_node(str(i))

if num_edge_labels > 0:
edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges)
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(node1, node2, edge_label=edge_labels[i])
else:
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(node1, node2)
graphs.append(g)
for idx in range(0, num_graphs):
graphs.append(self.random_graph(num_nodes, num_edges, num_node_labels=num_node_labels, num_edge_labels=num_edge_labels, seed=seed, directed=directed, max_num_edges=max_num_edges, all_edges=all_edges))
return graphs return graphs

Loading…
Cancel
Save