|
@@ -0,0 +1,186 @@ |
|
|
|
|
|
#!/usr/bin/env python3 |
|
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
""" |
|
|
|
|
|
Created on Mon Oct 5 16:08:33 2020 |
|
|
|
|
|
|
|
|
|
|
|
@author: ljia |
|
|
|
|
|
|
|
|
|
|
|
This script compute classification accuracy of each geaph kernel on datasets |
|
|
|
|
|
with different entropy of degree distribution. |
|
|
|
|
|
""" |
|
|
|
|
|
from utils import Graph_Kernel_List, cross_validate |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
import logging |
|
|
|
|
|
|
|
|
|
|
|
num_nodes = 40 |
|
|
|
|
|
half_num_graphs = 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_graphs(): |
|
|
|
|
|
# from gklearn.utils.graph_synthesizer import GraphSynthesizer |
|
|
|
|
|
# gsyzer = GraphSynthesizer() |
|
|
|
|
|
# graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) |
|
|
|
|
|
# return graphs |
|
|
|
|
|
import networkx as nx |
|
|
|
|
|
|
|
|
|
|
|
degrees11 = [5] * num_nodes |
|
|
|
|
|
# degrees12 = [2] * num_nodes |
|
|
|
|
|
degrees12 = [5] * num_nodes |
|
|
|
|
|
degrees21 = list(range(1, 11)) * 6 |
|
|
|
|
|
# degrees22 = [5 * i for i in list(range(1, 11)) * 6] |
|
|
|
|
|
degrees22 = list(range(1, 11)) * 6 |
|
|
|
|
|
|
|
|
|
|
|
# method 1 |
|
|
|
|
|
graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)] |
|
|
|
|
|
graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# method 2: can easily generate isomorphic graphs. |
|
|
|
|
|
# graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)] |
|
|
|
|
|
# graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# Add node labels. |
|
|
|
|
|
for g in graphs11: |
|
|
|
|
|
for n in g.nodes(): |
|
|
|
|
|
g.nodes[n]['atom'] = 0 |
|
|
|
|
|
for g in graphs12: |
|
|
|
|
|
for n in g.nodes(): |
|
|
|
|
|
g.nodes[n]['atom'] = 1 |
|
|
|
|
|
|
|
|
|
|
|
graphs1 = graphs11 + graphs12 |
|
|
|
|
|
|
|
|
|
|
|
# method 1: the entorpy of the two classes is not the same. |
|
|
|
|
|
graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] |
|
|
|
|
|
graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# # method 2: tooo slow, and may fail. |
|
|
|
|
|
# graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)] |
|
|
|
|
|
# graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# # method 3: no randomness. |
|
|
|
|
|
# graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)] |
|
|
|
|
|
# graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# # method 4: |
|
|
|
|
|
# graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] |
|
|
|
|
|
# graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# # method 5: the entorpy of the two classes is not the same. |
|
|
|
|
|
# graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)] |
|
|
|
|
|
# graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# # method 6: seems there is no randomness0 |
|
|
|
|
|
# graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] |
|
|
|
|
|
# graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] |
|
|
|
|
|
|
|
|
|
|
|
# Add node labels. |
|
|
|
|
|
for g in graphs21: |
|
|
|
|
|
for n in g.nodes(): |
|
|
|
|
|
g.nodes[n]['atom'] = 0 |
|
|
|
|
|
for g in graphs22: |
|
|
|
|
|
for n in g.nodes(): |
|
|
|
|
|
g.nodes[n]['atom'] = 1 |
|
|
|
|
|
|
|
|
|
|
|
graphs2 = graphs21 + graphs22 |
|
|
|
|
|
|
|
|
|
|
|
# # check for isomorphism. |
|
|
|
|
|
# iso_mat1 = np.zeros((len(graphs1), len(graphs1))) |
|
|
|
|
|
# num1 = 0 |
|
|
|
|
|
# num2 = 0 |
|
|
|
|
|
# for i in range(len(graphs1)): |
|
|
|
|
|
# for j in range(i + 1, len(graphs1)): |
|
|
|
|
|
# if nx.is_isomorphic(graphs1[i], graphs1[j]): |
|
|
|
|
|
# iso_mat1[i, j] = 1 |
|
|
|
|
|
# iso_mat1[j, i] = 1 |
|
|
|
|
|
# num1 += 1 |
|
|
|
|
|
# print('iso:', num1, ':', i, ',', j) |
|
|
|
|
|
# else: |
|
|
|
|
|
# num2 += 1 |
|
|
|
|
|
# print('not iso:', num2, ':', i, ',', j) |
|
|
|
|
|
# |
|
|
|
|
|
# iso_mat2 = np.zeros((len(graphs2), len(graphs2))) |
|
|
|
|
|
# num1 = 0 |
|
|
|
|
|
# num2 = 0 |
|
|
|
|
|
# for i in range(len(graphs2)): |
|
|
|
|
|
# for j in range(i + 1, len(graphs2)): |
|
|
|
|
|
# if nx.is_isomorphic(graphs2[i], graphs2[j]): |
|
|
|
|
|
# iso_mat2[i, j] = 1 |
|
|
|
|
|
# iso_mat2[j, i] = 1 |
|
|
|
|
|
# num1 += 1 |
|
|
|
|
|
# print('iso:', num1, ':', i, ',', j) |
|
|
|
|
|
# else: |
|
|
|
|
|
# num2 += 1 |
|
|
|
|
|
# print('not iso:', num2, ':', i, ',', j) |
|
|
|
|
|
|
|
|
|
|
|
return graphs1, graphs2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_infos(graph): |
|
|
|
|
|
from gklearn.utils import Dataset |
|
|
|
|
|
ds = Dataset() |
|
|
|
|
|
ds.load_graphs(graph) |
|
|
|
|
|
infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree']) |
|
|
|
|
|
infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy']) |
|
|
|
|
|
print(infos['ave_degree_entropy'], ',', infos['ave_node_degree']) |
|
|
|
|
|
return infos |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def xp_accuracy_diff_entropy(): |
|
|
|
|
|
|
|
|
|
|
|
# Generate graphs. |
|
|
|
|
|
graphs1, graphs2 = generate_graphs() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Compute entropy of degree distribution of the generated graphs. |
|
|
|
|
|
info11 = get_infos(graphs1[0:half_num_graphs]) |
|
|
|
|
|
info12 = get_infos(graphs1[half_num_graphs:]) |
|
|
|
|
|
info21 = get_infos(graphs2[0:half_num_graphs]) |
|
|
|
|
|
info22 = get_infos(graphs2[half_num_graphs:]) |
|
|
|
|
|
|
|
|
|
|
|
# Run and save. |
|
|
|
|
|
import pickle |
|
|
|
|
|
import os |
|
|
|
|
|
save_dir = 'outputs/accuracy_diff_entropy/' |
|
|
|
|
|
if not os.path.exists(save_dir): |
|
|
|
|
|
os.makedirs(save_dir) |
|
|
|
|
|
|
|
|
|
|
|
accuracies = {} |
|
|
|
|
|
confidences = {} |
|
|
|
|
|
|
|
|
|
|
|
for kernel_name in Graph_Kernel_List: |
|
|
|
|
|
print() |
|
|
|
|
|
print('Kernel:', kernel_name) |
|
|
|
|
|
|
|
|
|
|
|
accuracies[kernel_name] = [] |
|
|
|
|
|
confidences[kernel_name] = [] |
|
|
|
|
|
for set_i, graphs in enumerate([graphs1, graphs2]): |
|
|
|
|
|
print() |
|
|
|
|
|
print('Graph set', set_i) |
|
|
|
|
|
|
|
|
|
|
|
tmp_graphs = [g.copy() for g in graphs] |
|
|
|
|
|
targets = [0] * half_num_graphs + [1] * half_num_graphs |
|
|
|
|
|
|
|
|
|
|
|
accuracy = 'error' |
|
|
|
|
|
confidence = 'error' |
|
|
|
|
|
try: |
|
|
|
|
|
accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1) |
|
|
|
|
|
except Exception as exp: |
|
|
|
|
|
print('An exception occured when running this experiment:') |
|
|
|
|
|
LOG_FILENAME = save_dir + 'error.txt' |
|
|
|
|
|
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) |
|
|
|
|
|
logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':') |
|
|
|
|
|
print(repr(exp)) |
|
|
|
|
|
accuracies[kernel_name].append(accuracy) |
|
|
|
|
|
confidences[kernel_name].append(confidence) |
|
|
|
|
|
|
|
|
|
|
|
pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) |
|
|
|
|
|
pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) |
|
|
|
|
|
|
|
|
|
|
|
# Save all. |
|
|
|
|
|
pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb')) |
|
|
|
|
|
pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb')) |
|
|
|
|
|
|
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
xp_accuracy_diff_entropy() |