You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_diff_entropy.py 6.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Oct 5 16:08:33 2020
  5. @author: ljia
  6. This script compute classification accuracy of each geaph kernel on datasets
  7. with different entropy of degree distribution.
  8. """
  9. from utils import Graph_Kernel_List, cross_validate
  10. import numpy as np
  11. import logging
  12. num_nodes = 40
  13. half_num_graphs = 100
  14. def generate_graphs():
  15. # from gklearn.utils.graph_synthesizer import GraphSynthesizer
  16. # gsyzer = GraphSynthesizer()
  17. # graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
  18. # return graphs
  19. import networkx as nx
  20. degrees11 = [5] * num_nodes
  21. # degrees12 = [2] * num_nodes
  22. degrees12 = [5] * num_nodes
  23. degrees21 = list(range(1, 11)) * 6
  24. # degrees22 = [5 * i for i in list(range(1, 11)) * 6]
  25. degrees22 = list(range(1, 11)) * 6
  26. # method 1
  27. graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)]
  28. graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)]
  29. for g in graphs11:
  30. g.remove_edges_from(nx.selfloop_edges(g))
  31. for g in graphs12:
  32. g.remove_edges_from(nx.selfloop_edges(g))
  33. # method 2: can easily generate isomorphic graphs.
  34. # graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)]
  35. # graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)]
  36. # Add node labels.
  37. for g in graphs11:
  38. for n in g.nodes():
  39. g.nodes[n]['atom'] = 0
  40. for g in graphs12:
  41. for n in g.nodes():
  42. g.nodes[n]['atom'] = 1
  43. graphs1 = graphs11 + graphs12
  44. # method 1: the entorpy of the two classes is not the same.
  45. graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
  46. graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)]
  47. for g in graphs21:
  48. g.remove_edges_from(nx.selfloop_edges(g))
  49. for g in graphs22:
  50. g.remove_edges_from(nx.selfloop_edges(g))
  51. # # method 2: tooo slow, and may fail.
  52. # graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)]
  53. # graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)]
  54. # # method 3: no randomness.
  55. # graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)]
  56. # graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)]
  57. # # method 4:
  58. # graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
  59. # graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
  60. # # method 5: the entorpy of the two classes is not the same.
  61. # graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)]
  62. # graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)]
  63. # # method 6: seems there is no randomness0
  64. # graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]
  65. # graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]
  66. # Add node labels.
  67. for g in graphs21:
  68. for n in g.nodes():
  69. g.nodes[n]['atom'] = 0
  70. for g in graphs22:
  71. for n in g.nodes():
  72. g.nodes[n]['atom'] = 1
  73. graphs2 = graphs21 + graphs22
  74. # # check for isomorphism.
  75. # iso_mat1 = np.zeros((len(graphs1), len(graphs1)))
  76. # num1 = 0
  77. # num2 = 0
  78. # for i in range(len(graphs1)):
  79. # for j in range(i + 1, len(graphs1)):
  80. # if nx.is_isomorphic(graphs1[i], graphs1[j]):
  81. # iso_mat1[i, j] = 1
  82. # iso_mat1[j, i] = 1
  83. # num1 += 1
  84. # print('iso:', num1, ':', i, ',', j)
  85. # else:
  86. # num2 += 1
  87. # print('not iso:', num2, ':', i, ',', j)
  88. #
  89. # iso_mat2 = np.zeros((len(graphs2), len(graphs2)))
  90. # num1 = 0
  91. # num2 = 0
  92. # for i in range(len(graphs2)):
  93. # for j in range(i + 1, len(graphs2)):
  94. # if nx.is_isomorphic(graphs2[i], graphs2[j]):
  95. # iso_mat2[i, j] = 1
  96. # iso_mat2[j, i] = 1
  97. # num1 += 1
  98. # print('iso:', num1, ':', i, ',', j)
  99. # else:
  100. # num2 += 1
  101. # print('not iso:', num2, ':', i, ',', j)
  102. return graphs1, graphs2
  103. def get_infos(graph):
  104. from gklearn.utils import Dataset
  105. ds = Dataset()
  106. ds.load_graphs(graph)
  107. infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree'])
  108. infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy'])
  109. print(infos['ave_degree_entropy'], ',', infos['ave_node_degree'])
  110. return infos
  111. def xp_accuracy_diff_entropy():
  112. # Generate graphs.
  113. graphs1, graphs2 = generate_graphs()
  114. # Compute entropy of degree distribution of the generated graphs.
  115. info11 = get_infos(graphs1[0:half_num_graphs])
  116. info12 = get_infos(graphs1[half_num_graphs:])
  117. info21 = get_infos(graphs2[0:half_num_graphs])
  118. info22 = get_infos(graphs2[half_num_graphs:])
  119. # Run and save.
  120. import pickle
  121. import os
  122. save_dir = 'outputs/accuracy_diff_entropy/'
  123. os.makedirs(save_dir, exist_ok=True)
  124. accuracies = {}
  125. confidences = {}
  126. for kernel_name in Graph_Kernel_List:
  127. print()
  128. print('Kernel:', kernel_name)
  129. accuracies[kernel_name] = []
  130. confidences[kernel_name] = []
  131. for set_i, graphs in enumerate([graphs1, graphs2]):
  132. print()
  133. print('Graph set', set_i)
  134. tmp_graphs = [g.copy() for g in graphs]
  135. targets = [0] * half_num_graphs + [1] * half_num_graphs
  136. accuracy = 'error'
  137. confidence = 'error'
  138. try:
  139. accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1)
  140. except Exception as exp:
  141. print('An exception occured when running this experiment:')
  142. LOG_FILENAME = save_dir + 'error.txt'
  143. logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
  144. logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':')
  145. print(repr(exp))
  146. accuracies[kernel_name].append(accuracy)
  147. confidences[kernel_name].append(confidence)
  148. pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
  149. pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
  150. # Save all.
  151. pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb'))
  152. pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb'))
  153. return
  154. if __name__ == '__main__':
  155. xp_accuracy_diff_entropy()

A Python package for graph kernels, graph edit distances and graph pre-image problem.