You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_diff_entropy.py 6.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Oct 5 16:08:33 2020
  5. @author: ljia
  6. This script compute classification accuracy of each geaph kernel on datasets
  7. with different entropy of degree distribution.
  8. """
  9. from utils import Graph_Kernel_List, cross_validate
  10. import numpy as np
  11. import logging
  12. num_nodes = 40
  13. half_num_graphs = 100
  14. def generate_graphs():
  15. # from gklearn.utils.graph_synthesizer import GraphSynthesizer
  16. # gsyzer = GraphSynthesizer()
  17. # graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
  18. # return graphs
  19. import networkx as nx
  20. degrees11 = [5] * num_nodes
  21. # degrees12 = [2] * num_nodes
  22. degrees12 = [5] * num_nodes
  23. degrees21 = list(range(1, 11)) * 6
  24. # degrees22 = [5 * i for i in list(range(1, 11)) * 6]
  25. degrees22 = list(range(1, 11)) * 6
  26. # method 1
  27. graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)]
  28. graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)]
  29. # method 2: can easily generate isomorphic graphs.
  30. # graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)]
  31. # graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)]
  32. # Add node labels.
  33. for g in graphs11:
  34. for n in g.nodes():
  35. g.nodes[n]['atom'] = 0
  36. for g in graphs12:
  37. for n in g.nodes():
  38. g.nodes[n]['atom'] = 1
  39. graphs1 = graphs11 + graphs12
  40. # method 1: the entorpy of the two classes is not the same.
  41. graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
  42. graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)]
  43. # # method 2: tooo slow, and may fail.
  44. # graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)]
  45. # graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)]
  46. # # method 3: no randomness.
  47. # graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)]
  48. # graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)]
  49. # # method 4:
  50. # graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
  51. # graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
  52. # # method 5: the entorpy of the two classes is not the same.
  53. # graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)]
  54. # graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)]
  55. # # method 6: seems there is no randomness0
  56. # graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]
  57. # graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]
  58. # Add node labels.
  59. for g in graphs21:
  60. for n in g.nodes():
  61. g.nodes[n]['atom'] = 0
  62. for g in graphs22:
  63. for n in g.nodes():
  64. g.nodes[n]['atom'] = 1
  65. graphs2 = graphs21 + graphs22
  66. # # check for isomorphism.
  67. # iso_mat1 = np.zeros((len(graphs1), len(graphs1)))
  68. # num1 = 0
  69. # num2 = 0
  70. # for i in range(len(graphs1)):
  71. # for j in range(i + 1, len(graphs1)):
  72. # if nx.is_isomorphic(graphs1[i], graphs1[j]):
  73. # iso_mat1[i, j] = 1
  74. # iso_mat1[j, i] = 1
  75. # num1 += 1
  76. # print('iso:', num1, ':', i, ',', j)
  77. # else:
  78. # num2 += 1
  79. # print('not iso:', num2, ':', i, ',', j)
  80. #
  81. # iso_mat2 = np.zeros((len(graphs2), len(graphs2)))
  82. # num1 = 0
  83. # num2 = 0
  84. # for i in range(len(graphs2)):
  85. # for j in range(i + 1, len(graphs2)):
  86. # if nx.is_isomorphic(graphs2[i], graphs2[j]):
  87. # iso_mat2[i, j] = 1
  88. # iso_mat2[j, i] = 1
  89. # num1 += 1
  90. # print('iso:', num1, ':', i, ',', j)
  91. # else:
  92. # num2 += 1
  93. # print('not iso:', num2, ':', i, ',', j)
  94. return graphs1, graphs2
  95. def get_infos(graph):
  96. from gklearn.utils import Dataset
  97. ds = Dataset()
  98. ds.load_graphs(graph)
  99. infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree'])
  100. infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy'])
  101. print(infos['ave_degree_entropy'], ',', infos['ave_node_degree'])
  102. return infos
  103. def xp_accuracy_diff_entropy():
  104. # Generate graphs.
  105. graphs1, graphs2 = generate_graphs()
  106. # Compute entropy of degree distribution of the generated graphs.
  107. info11 = get_infos(graphs1[0:half_num_graphs])
  108. info12 = get_infos(graphs1[half_num_graphs:])
  109. info21 = get_infos(graphs2[0:half_num_graphs])
  110. info22 = get_infos(graphs2[half_num_graphs:])
  111. # Run and save.
  112. import pickle
  113. import os
  114. save_dir = 'outputs/accuracy_diff_entropy/'
  115. if not os.path.exists(save_dir):
  116. os.makedirs(save_dir)
  117. accuracies = {}
  118. confidences = {}
  119. for kernel_name in Graph_Kernel_List:
  120. print()
  121. print('Kernel:', kernel_name)
  122. accuracies[kernel_name] = []
  123. confidences[kernel_name] = []
  124. for set_i, graphs in enumerate([graphs1, graphs2]):
  125. print()
  126. print('Graph set', set_i)
  127. tmp_graphs = [g.copy() for g in graphs]
  128. targets = [0] * half_num_graphs + [1] * half_num_graphs
  129. accuracy = 'error'
  130. confidence = 'error'
  131. try:
  132. accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1)
  133. except Exception as exp:
  134. print('An exception occured when running this experiment:')
  135. LOG_FILENAME = save_dir + 'error.txt'
  136. logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
  137. logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':')
  138. print(repr(exp))
  139. accuracies[kernel_name].append(accuracy)
  140. confidences[kernel_name].append(confidence)
  141. pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
  142. pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
  143. # Save all.
  144. pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb'))
  145. pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb'))
  146. return
  147. if __name__ == '__main__':
  148. xp_accuracy_diff_entropy()

A Python package for graph kernels, graph edit distances and graph pre-image problem.