diff --git a/lang/fr/gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py b/lang/fr/gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py new file mode 100644 index 0000000..c25c116 --- /dev/null +++ b/lang/fr/gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Oct 5 16:08:33 2020 + +@author: ljia + +This script compute classification accuracy of each geaph kernel on datasets +with different entropy of degree distribution. +""" +from utils import Graph_Kernel_List, cross_validate +import numpy as np +import logging + +num_nodes = 40 +half_num_graphs = 100 + + +def generate_graphs(): +# from gklearn.utils.graph_synthesizer import GraphSynthesizer +# gsyzer = GraphSynthesizer() +# graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) +# return graphs + import networkx as nx + + degrees11 = [5] * num_nodes +# degrees12 = [2] * num_nodes + degrees12 = [5] * num_nodes + degrees21 = list(range(1, 11)) * 6 +# degrees22 = [5 * i for i in list(range(1, 11)) * 6] + degrees22 = list(range(1, 11)) * 6 + + # method 1 + graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)] + graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)] + + # method 2: can easily generate isomorphic graphs. +# graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)] +# graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)] + + # Add node labels. + for g in graphs11: + for n in g.nodes(): + g.nodes[n]['atom'] = 0 + for g in graphs12: + for n in g.nodes(): + g.nodes[n]['atom'] = 1 + + graphs1 = graphs11 + graphs12 + + # method 1: the entorpy of the two classes is not the same. + graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] + graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)] + +# # method 2: tooo slow, and may fail. +# graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)] +# graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)] + +# # method 3: no randomness. +# graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)] +# graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)] + +# # method 4: +# graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] +# graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] + +# # method 5: the entorpy of the two classes is not the same. +# graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)] +# graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)] + +# # method 6: seems there is no randomness0 +# graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] +# graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] + + # Add node labels. + for g in graphs21: + for n in g.nodes(): + g.nodes[n]['atom'] = 0 + for g in graphs22: + for n in g.nodes(): + g.nodes[n]['atom'] = 1 + + graphs2 = graphs21 + graphs22 + +# # check for isomorphism. +# iso_mat1 = np.zeros((len(graphs1), len(graphs1))) +# num1 = 0 +# num2 = 0 +# for i in range(len(graphs1)): +# for j in range(i + 1, len(graphs1)): +# if nx.is_isomorphic(graphs1[i], graphs1[j]): +# iso_mat1[i, j] = 1 +# iso_mat1[j, i] = 1 +# num1 += 1 +# print('iso:', num1, ':', i, ',', j) +# else: +# num2 += 1 +# print('not iso:', num2, ':', i, ',', j) +# +# iso_mat2 = np.zeros((len(graphs2), len(graphs2))) +# num1 = 0 +# num2 = 0 +# for i in range(len(graphs2)): +# for j in range(i + 1, len(graphs2)): +# if nx.is_isomorphic(graphs2[i], graphs2[j]): +# iso_mat2[i, j] = 1 +# iso_mat2[j, i] = 1 +# num1 += 1 +# print('iso:', num1, ':', i, ',', j) +# else: +# num2 += 1 +# print('not iso:', num2, ':', i, ',', j) + + return graphs1, graphs2 + + +def get_infos(graph): + from gklearn.utils import Dataset + ds = Dataset() + ds.load_graphs(graph) + infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree']) + infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy']) + print(infos['ave_degree_entropy'], ',', infos['ave_node_degree']) + return infos + + +def xp_accuracy_diff_entropy(): + + # Generate graphs. + graphs1, graphs2 = generate_graphs() + + + # Compute entropy of degree distribution of the generated graphs. + info11 = get_infos(graphs1[0:half_num_graphs]) + info12 = get_infos(graphs1[half_num_graphs:]) + info21 = get_infos(graphs2[0:half_num_graphs]) + info22 = get_infos(graphs2[half_num_graphs:]) + + # Run and save. + import pickle + import os + save_dir = 'outputs/accuracy_diff_entropy/' + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + accuracies = {} + confidences = {} + + for kernel_name in Graph_Kernel_List: + print() + print('Kernel:', kernel_name) + + accuracies[kernel_name] = [] + confidences[kernel_name] = [] + for set_i, graphs in enumerate([graphs1, graphs2]): + print() + print('Graph set', set_i) + + tmp_graphs = [g.copy() for g in graphs] + targets = [0] * half_num_graphs + [1] * half_num_graphs + + accuracy = 'error' + confidence = 'error' + try: + accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = save_dir + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':') + print(repr(exp)) + accuracies[kernel_name].append(accuracy) + confidences[kernel_name].append(confidence) + + pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) + pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) + + # Save all. + pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb')) + pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb')) + + return + + +if __name__ == '__main__': + xp_accuracy_diff_entropy() \ No newline at end of file