You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

edit_costs.max_num_sols.N.bipartite.py 4.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Oct 20 11:48:02 2020
  5. @author: ljia
  6. """
  7. # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].
  8. import os
  9. import multiprocessing
  10. import pickle
  11. import logging
  12. from gklearn.ged.util import compute_geds
  13. import time
  14. import sys
  15. from group_results import group_trials
  16. def generate_graphs():
  17. from gklearn.utils.graph_synthesizer import GraphSynthesizer
  18. gsyzer = GraphSynthesizer()
  19. graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
  20. return graphs
  21. def xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial):
  22. save_file_suffix = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
  23. # Return if the file exists.
  24. if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
  25. return None, None
  26. """**2. Set parameters.**"""
  27. # Parameters for GED computation.
  28. ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic.
  29. # 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv)
  30. 'lsape_model': 'ECBP', #
  31. # ??when bigger than 1, then the method is considered mIPFP.
  32. # the actual number of computed solutions might be smaller than the specified value
  33. 'max_num_solutions': max_num_solutions,
  34. 'edit_cost': 'CONSTANT', # use CONSTANT cost.
  35. 'greedy_method': 'BASIC', #
  36. # the distance between non-symbolic node/edge labels is computed by euclidean distance.
  37. 'attr_distance': 'euclidean',
  38. 'optimal': True, # if TRUE, the option --greedy-method has no effect
  39. # parallel threads. Do not work if mpg_options['parallel'] = False.
  40. 'threads': multiprocessing.cpu_count(),
  41. 'centrality_method': 'NONE',
  42. 'centrality_weight': 0.7,
  43. 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
  44. }
  45. edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
  46. # edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
  47. # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
  48. options = ged_options.copy()
  49. options['edit_cost_constants'] = edit_cost_constants
  50. options['node_labels'] = []
  51. options['edge_labels'] = []
  52. options['node_attrs'] = []
  53. options['edge_attrs'] = []
  54. parallel = True # if num_solutions == 1 else False
  55. """**5. Compute GED matrix.**"""
  56. ged_mat = 'error'
  57. runtime = 0
  58. try:
  59. time0 = time.time()
  60. ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True)
  61. runtime = time.time() - time0
  62. except Exception as exp:
  63. print('An exception occured when running this experiment:')
  64. LOG_FILENAME = save_dir + 'error.txt'
  65. logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
  66. logging.exception(save_file_suffix)
  67. print(repr(exp))
  68. """**6. Get results.**"""
  69. with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
  70. pickle.dump(ged_mat, f)
  71. with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
  72. pickle.dump(runtime, f)
  73. return ged_mat, runtime
  74. def save_trials_as_group(graphs, N, max_num_solutions, ratio):
  75. # Return if the group file exists.
  76. name_middle = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
  77. name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
  78. if os.path.isfile(name_group):
  79. return
  80. ged_mats = []
  81. runtimes = []
  82. for trial in range(1, 101):
  83. print()
  84. print('Trial:', trial)
  85. ged_mat, runtime = xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial)
  86. ged_mats.append(ged_mat)
  87. runtimes.append(runtime)
  88. # Group trials and Remove single files.
  89. name_prefix = 'ged_matrix' + name_middle
  90. group_trials(save_dir, name_prefix, True, True, False)
  91. name_prefix = 'runtime' + name_middle
  92. group_trials(save_dir, name_prefix, True, True, False)
  93. def results_for_a_ratio(ratio):
  94. for N in N_list:
  95. print()
  96. print('# of graphs:', N)
  97. for max_num_solutions in [1, 20, 40, 60, 80, 100]:
  98. print()
  99. print('Max # of solutions:', max_num_solutions)
  100. save_trials_as_group(graphs[:N], N, max_num_solutions, ratio)
  101. if __name__ == '__main__':
  102. if len(sys.argv) > 1:
  103. N_list = [int(i) for i in sys.argv[1:]]
  104. else:
  105. N_list = [10, 50, 100]
  106. # Generate graphs.
  107. graphs = generate_graphs()
  108. save_dir = 'outputs/edit_costs.max_num_sols.N.bipartite/'
  109. os.makedirs(save_dir, exist_ok=True)
  110. os.makedirs(save_dir + 'groups/', exist_ok=True)
  111. for ratio in [10, 1, 0.1]:
  112. print()
  113. print('Ratio:', ratio)
  114. results_for_a_ratio(ratio)

A Python package for graph kernels, graph edit distances and graph pre-image problem.