You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

edit_costs.repeats.ratios.IPFP.py 4.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Oct 20 17:48:02 2020
  5. @author: ljia
  6. """
  7. # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. The minimum solution from given numbers of repeats are computed.
  8. import os
  9. import multiprocessing
  10. import pickle
  11. import logging
  12. from gklearn.ged.util import compute_geds
  13. import numpy as np
  14. import time
  15. from utils import get_dataset
  16. import sys
  17. def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
  18. save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
  19. """**1. Get dataset.**"""
  20. dataset = get_dataset(ds_name)
  21. """**2. Set parameters.**"""
  22. # Parameters for GED computation.
  23. ged_options = {'method': 'IPFP', # use IPFP huristic.
  24. 'initialization_method': 'RANDOM', # or 'NODE', etc.
  25. # when bigger than 1, then the method is considered mIPFP.
  26. 'initial_solutions': 1,
  27. 'edit_cost': 'CONSTANT', # use CONSTANT cost.
  28. # the distance between non-symbolic node/edge labels is computed by euclidean distance.
  29. 'attr_distance': 'euclidean',
  30. 'ratio_runs_from_initial_solutions': 1,
  31. # parallel threads. Do not work if mpg_options['parallel'] = False.
  32. 'threads': multiprocessing.cpu_count(),
  33. 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
  34. }
  35. edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
  36. # edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
  37. # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
  38. options = ged_options.copy()
  39. options['edit_cost_constants'] = edit_cost_constants
  40. options['node_labels'] = dataset.node_labels
  41. options['edge_labels'] = dataset.edge_labels
  42. options['node_attrs'] = dataset.node_attrs
  43. options['edge_attrs'] = dataset.edge_attrs
  44. parallel = True # if num_solutions == 1 else False
  45. """**5. Compute GED matrix.**"""
  46. ged_mat = 'error'
  47. runtime = 0
  48. try:
  49. time0 = time.time()
  50. ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
  51. runtime = time.time() - time0
  52. except Exception as exp:
  53. print('An exception occured when running this experiment:')
  54. LOG_FILENAME = save_dir + 'error.txt'
  55. logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
  56. logging.exception(save_file_suffix)
  57. print(repr(exp))
  58. """**6. Get results.**"""
  59. with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
  60. pickle.dump(ged_mat, f)
  61. with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
  62. pickle.dump(runtime, f)
  63. return ged_mat, runtime
  64. def save_trials_as_group(dataset, ds_name, repeats, ratio):
  65. ged_mats = []
  66. runtimes = []
  67. for trial in range(1, 101):
  68. print()
  69. print('Trial:', trial)
  70. ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial)
  71. ged_mats.append(ged_mat)
  72. runtimes.append(runtime)
  73. # save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio)
  74. # with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f:
  75. # np.save(f, np.array(ged_mats))
  76. # with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f:
  77. # pickle.dump(runtime, f)
  78. def results_for_a_dataset(ds_name):
  79. """**1. Get dataset.**"""
  80. dataset = get_dataset(ds_name)
  81. for repeats in [1, 20, 40, 60, 80, 100]:
  82. print()
  83. print('Repeats:', repeats)
  84. for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
  85. print()
  86. print('Ratio:', ratio)
  87. save_trials_as_group(dataset, ds_name, repeats, ratio)
  88. if __name__ == '__main__':
  89. if len(sys.argv) > 1:
  90. ds_name_list = sys.argv[1:]
  91. else:
  92. ds_name_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
  93. save_dir = 'outputs/edit_costs.repeats.ratios.IPFP/'
  94. os.makedirs(save_dir, exist_ok=True)
  95. os.makedirs(save_dir + 'groups/', exist_ok=True)
  96. for ds_name in ds_name_list:
  97. print()
  98. print('Dataset:', ds_name)
  99. results_for_a_dataset(ds_name)

A Python package for graph kernels, graph edit distances and graph pre-image problem.