You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_median_graph_estimator.py 5.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Mar 16 17:26:40 2020
  5. @author: ljia
  6. """
  7. def test_median_graph_estimator():
  8. from gklearn.utils import load_dataset
  9. from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
  10. from gklearn.gedlib import librariesImport, gedlibpy
  11. from gklearn.preimage.utils import get_same_item_indices
  12. import multiprocessing
  13. # estimator parameters.
  14. init_type = 'MEDOID'
  15. num_inits = 1
  16. threads = multiprocessing.cpu_count()
  17. time_limit = 60000
  18. # algorithm parameters.
  19. algo = 'IPFP'
  20. initial_solutions = 1
  21. algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '
  22. edit_cost_name = 'LETTER2'
  23. edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001]
  24. ds_name = 'Letter_high'
  25. # Load dataset.
  26. # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
  27. dataset = '../../../datasets/Letter-high/Letter-high_A.txt'
  28. Gn, y_all, label_names = load_dataset(dataset)
  29. y_idx = get_same_item_indices(y_all)
  30. for i, (y, values) in enumerate(y_idx.items()):
  31. Gn_i = [Gn[val] for val in values]
  32. break
  33. # Set up the environment.
  34. ged_env = gedlibpy.GEDEnv()
  35. # gedlibpy.restart_env()
  36. ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
  37. for G in Gn_i:
  38. ged_env.add_nx_graph(G, '')
  39. graph_ids = ged_env.get_all_graph_ids()
  40. set_median_id = ged_env.add_graph('set_median')
  41. gen_median_id = ged_env.add_graph('gen_median')
  42. ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
  43. # Set up the estimator.
  44. mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
  45. mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
  46. mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
  47. mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO --parallel TRUE '# @todo: std::to_string(rng())
  48. # Select the GED algorithm.
  49. algo_options = '--threads ' + str(threads) + algo_options_suffix
  50. mge.set_options(mge_options)
  51. mge.set_label_names(node_labels=label_names['node_labels'],
  52. edge_labels=label_names['edge_labels'],
  53. node_attrs=label_names['node_attrs'],
  54. edge_attrs=label_names['edge_attrs'])
  55. mge.set_init_method(algo, algo_options)
  56. mge.set_descent_method(algo, algo_options)
  57. # Run the estimator.
  58. mge.run(graph_ids, set_median_id, gen_median_id)
  59. # Get SODs.
  60. sod_sm = mge.get_sum_of_distances('initialized')
  61. sod_gm = mge.get_sum_of_distances('converged')
  62. print('sod_sm, sod_gm: ', sod_sm, sod_gm)
  63. # Get median graphs.
  64. set_median = ged_env.get_nx_graph(set_median_id)
  65. gen_median = ged_env.get_nx_graph(gen_median_id)
  66. return set_median, gen_median
  67. def test_median_graph_estimator_symb():
  68. from gklearn.utils import load_dataset
  69. from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
  70. from gklearn.gedlib import librariesImport, gedlibpy
  71. from gklearn.preimage.utils import get_same_item_indices
  72. import multiprocessing
  73. # estimator parameters.
  74. init_type = 'MEDOID'
  75. num_inits = 1
  76. threads = multiprocessing.cpu_count()
  77. time_limit = 60000
  78. # algorithm parameters.
  79. algo = 'IPFP'
  80. initial_solutions = 1
  81. algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '
  82. edit_cost_name = 'CONSTANT'
  83. edit_cost_constants = [4, 4, 2, 1, 1, 1]
  84. ds_name = 'MUTAG'
  85. # Load dataset.
  86. dataset = '../../../datasets/MUTAG/MUTAG_A.txt'
  87. Gn, y_all, label_names = load_dataset(dataset)
  88. y_idx = get_same_item_indices(y_all)
  89. for i, (y, values) in enumerate(y_idx.items()):
  90. Gn_i = [Gn[val] for val in values]
  91. break
  92. Gn_i = Gn_i[0:10]
  93. # Set up the environment.
  94. ged_env = gedlibpy.GEDEnv()
  95. # gedlibpy.restart_env()
  96. ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
  97. for G in Gn_i:
  98. ged_env.add_nx_graph(G, '')
  99. graph_ids = ged_env.get_all_graph_ids()
  100. set_median_id = ged_env.add_graph('set_median')
  101. gen_median_id = ged_env.add_graph('gen_median')
  102. ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
  103. # Set up the estimator.
  104. mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
  105. mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
  106. mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
  107. mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO --parallel TRUE '# @todo: std::to_string(rng())
  108. # Select the GED algorithm.
  109. algo_options = '--threads ' + str(threads) + algo_options_suffix
  110. mge.set_options(mge_options)
  111. mge.set_label_names(node_labels=label_names['node_labels'],
  112. edge_labels=label_names['edge_labels'],
  113. node_attrs=label_names['node_attrs'],
  114. edge_attrs=label_names['edge_attrs'])
  115. mge.set_init_method(algo, algo_options)
  116. mge.set_descent_method(algo, algo_options)
  117. # Run the estimator.
  118. mge.run(graph_ids, set_median_id, gen_median_id)
  119. # Get SODs.
  120. sod_sm = mge.get_sum_of_distances('initialized')
  121. sod_gm = mge.get_sum_of_distances('converged')
  122. print('sod_sm, sod_gm: ', sod_sm, sod_gm)
  123. # Get median graphs.
  124. set_median = ged_env.get_nx_graph(set_median_id)
  125. gen_median = ged_env.get_nx_graph(gen_median_id)
  126. return set_median, gen_median
  127. if __name__ == '__main__':
  128. # set_median, gen_median = test_median_graph_estimator()
  129. set_median, gen_median = test_median_graph_estimator_symb()

A Python package for graph kernels, graph edit distances and graph pre-image problem.