You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_ged.py 19 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. #export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
  2. #Pour que "import script" trouve les librairies qu'a besoin GedLib
  3. #Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
  4. #import gedlibpy_linlin.librariesImport
  5. #from gedlibpy_linlin import gedlibpy
  6. from libs import *
  7. import networkx as nx
  8. import numpy as np
  9. from tqdm import tqdm
  10. import sys
  11. def test_NON_SYMBOLIC_cost():
  12. """Test edit cost LETTER2.
  13. """
  14. from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
  15. from gklearn.preimage.test_k_closest_graphs import reform_attributes
  16. from gklearn.utils.graphfiles import loadDataset
  17. dataset = '../../datasets/Letter-high/Letter-high_A.txt'
  18. Gn, y_all = loadDataset(dataset)
  19. g1 = Gn[200]
  20. g2 = Gn[1780]
  21. reform_attributes(g1)
  22. reform_attributes(g2)
  23. c_vi = 0.675
  24. c_vr = 0.675
  25. c_vs = 0.75
  26. c_ei = 0.425
  27. c_er = 0.425
  28. c_es = 0
  29. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  30. dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy',
  31. cost='NON_SYMBOLIC', method='IPFP', edit_cost_constant=edit_cost_constant,
  32. algo_options='', stabilizer=None)
  33. n_vi, n_vr, sod_vs, n_ei, n_er, sod_es = get_nb_edit_operations_nonsymbolic(g1, g2,
  34. pi_forward, pi_backward)
  35. print('# of operations:', n_vi, n_vr, sod_vs, n_ei, n_er, sod_es)
  36. print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
  37. cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
  38. + c_ei * n_ei + c_er * n_er + c_es * sod_es
  39. print('dis (cost computed by GED):', dis)
  40. print('cost computed by # of operations and edit cost constants:', cost_computed)
  41. def test_LETTER2_cost():
  42. """Test edit cost LETTER2.
  43. """
  44. from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
  45. from gklearn.preimage.test_k_closest_graphs import reform_attributes
  46. from gklearn.utils.graphfiles import loadDataset
  47. ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
  48. 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
  49. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
  50. g1 = Gn[200]
  51. g2 = Gn[1780]
  52. reform_attributes(g1)
  53. reform_attributes(g2)
  54. c_vi = 0.675
  55. c_vr = 0.675
  56. c_vs = 0.75
  57. c_ei = 0.425
  58. c_er = 0.425
  59. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er]
  60. dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
  61. cost='LETTER2', method='IPFP', edit_cost_constant=edit_cost_constant,
  62. algo_options='', stabilizer=None)
  63. n_vi, n_vr, n_vs, sod_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
  64. pi_forward, pi_backward)
  65. print('# of operations:', n_vi, n_vr, n_vs, sod_vs, n_ei, n_er)
  66. print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er)
  67. cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
  68. + c_ei * n_ei + c_er * n_er
  69. print('dis (cost computed by GED):', dis)
  70. print('cost computed by # of operations and edit cost constants:', cost_computed)
  71. def test_get_nb_edit_operations_letter():
  72. """Test whether function preimage.ged.get_nb_edit_operations_letter returns
  73. correct numbers of edit operations. The distance/cost computed by GED
  74. should be the same as the cost computed by number of operations and edit
  75. cost constants.
  76. """
  77. from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
  78. from gklearn.preimage.test_k_closest_graphs import reform_attributes
  79. from gklearn.utils.graphfiles import loadDataset
  80. ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
  81. 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
  82. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
  83. g1 = Gn[200]
  84. g2 = Gn[1780]
  85. reform_attributes(g1)
  86. reform_attributes(g2)
  87. c_vir = 0.9
  88. c_eir = 1.7
  89. alpha = 0.75
  90. edit_cost_constant = [c_vir, c_eir, alpha]
  91. dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
  92. cost='LETTER', method='IPFP', edit_cost_constant=edit_cost_constant,
  93. algo_options='', stabilizer=None)
  94. n_vi, n_vr, n_vs, c_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
  95. pi_forward, pi_backward)
  96. print('# of operations and costs:', n_vi, n_vr, n_vs, c_vs, n_ei, n_er)
  97. print('c_vir, c_eir, alpha:', c_vir, c_eir, alpha)
  98. cost_computed = alpha * c_vir * (n_vi + n_vr) \
  99. + alpha * c_vs \
  100. + (1 - alpha) * c_eir * (n_ei + n_er)
  101. print('dis (cost computed by GED):', dis)
  102. print('cost computed by # of operations and edit cost constants:', cost_computed)
  103. def test_get_nb_edit_operations():
  104. """Test whether function preimage.ged.get_nb_edit_operations returns correct
  105. numbers of edit operations. The distance/cost computed by GED should be the
  106. same as the cost computed by number of operations and edit cost constants.
  107. """
  108. from gklearn.preimage.ged import GED, get_nb_edit_operations
  109. from gklearn.utils.graphfiles import loadDataset
  110. import os
  111. ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
  112. 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
  113. Gn, y_all = loadDataset(ds['dataset'])
  114. g1 = Gn[20]
  115. g2 = Gn[108]
  116. c_vi = 3
  117. c_vr = 3
  118. c_vs = 1
  119. c_ei = 3
  120. c_er = 3
  121. c_es = 1
  122. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  123. dis, pi_forward, pi_backward = GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy',
  124. cost='CONSTANT', method='IPFP', edit_cost_constant=edit_cost_constant,
  125. algo_options='', stabilizer=None)
  126. n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(g1, g2,
  127. pi_forward, pi_backward)
  128. print('# of operations and costs:', n_vi, n_vr, n_vs, n_ei, n_er, n_es)
  129. print('edit costs:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
  130. cost_computed = n_vi * c_vi + n_vr * c_vr + n_vs * c_vs \
  131. + n_ei * c_ei + n_er * c_er + n_es * c_es
  132. print('dis (cost computed by GED):', dis)
  133. print('cost computed by # of operations and edit cost constants:', cost_computed)
  134. def test_ged_python_bash_cpp():
  135. """Test ged computation with python invoking the c++ code by bash command (with updated library).
  136. """
  137. from gklearn.utils.graphfiles import loadDataset
  138. from gklearn.preimage.ged import GED
  139. data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
  140. # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
  141. collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
  142. graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
  143. Gn, y = loadDataset(collection_file, extra_params=graph_dir)
  144. algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
  145. for repeat in range(0, 3):
  146. # Generate the result file.
  147. ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_bash_' + str(repeat) + '_init40.3_20.txt'
  148. # runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
  149. ged_file = open(ged_filename, 'a')
  150. # runtime_file = open(runtime_filename, 'a')
  151. ged_mat = np.empty((len(Gn), len(Gn)))
  152. # runtime_mat = np.empty((len(Gn), len(Gn)))
  153. for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  154. for j in range(len(Gn)):
  155. print(i, j)
  156. g1 = Gn[i]
  157. g2 = Gn[j]
  158. upper_bound, _, _ = GED(g1, g2, lib='gedlib-bash', cost='CONSTANT',
  159. method='IPFP',
  160. edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
  161. algo_options=algo_options)
  162. # runtime = gedlibpy.get_runtime(g1, g2)
  163. ged_mat[i][j] = upper_bound
  164. # runtime_mat[i][j] = runtime
  165. # Write to files.
  166. ged_file.write(str(int(upper_bound)) + ' ')
  167. # runtime_file.write(str(runtime) + ' ')
  168. ged_file.write('\n')
  169. # runtime_file.write('\n')
  170. ged_file.close()
  171. # runtime_file.close()
  172. print('ged_mat')
  173. print(ged_mat)
  174. # print('runtime_mat:')
  175. # print(runtime_mat)
  176. return
  177. def test_ged_best_settings_updated():
  178. """Test ged computation with best settings the same as in the C++ code (with updated library).
  179. """
  180. data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
  181. collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
  182. # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
  183. graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
  184. algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
  185. for repeat in range(0, 3):
  186. # Generate the result file.
  187. ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_updated_' + str(repeat) + '_init40.txt'
  188. runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_updated_' + str(repeat) + '_init40.txt'
  189. gedlibpy.restart_env()
  190. gedlibpy.load_GXL_graphs(graph_dir, collection_file)
  191. listID = gedlibpy.get_all_graph_ids()
  192. gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
  193. gedlibpy.init()
  194. gedlibpy.set_method("IPFP", algo_options)
  195. gedlibpy.init_method()
  196. ged_mat = np.empty((len(listID), len(listID)))
  197. runtime_mat = np.empty((len(listID), len(listID)))
  198. for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
  199. ged_file = open(ged_filename, 'a')
  200. runtime_file = open(runtime_filename, 'a')
  201. for j in range(len(listID)):
  202. g1 = listID[i]
  203. g2 = listID[j]
  204. gedlibpy.run_method(g1, g2)
  205. upper_bound = gedlibpy.get_upper_bound(g1, g2)
  206. runtime = gedlibpy.get_runtime(g1, g2)
  207. ged_mat[i][j] = upper_bound
  208. runtime_mat[i][j] = runtime
  209. # Write to files.
  210. ged_file.write(str(int(upper_bound)) + ' ')
  211. runtime_file.write(str(runtime) + ' ')
  212. ged_file.write('\n')
  213. runtime_file.write('\n')
  214. ged_file.close()
  215. runtime_file.close()
  216. print('ged_mat')
  217. print(ged_mat)
  218. print('runtime_mat:')
  219. print(runtime_mat)
  220. return
  221. def test_ged_best_settings():
  222. """Test ged computation with best settings the same as in the C++ code.
  223. """
  224. data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
  225. collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
  226. graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
  227. algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
  228. for repeat in range(0, 3):
  229. # Generate the result file.
  230. ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_best_settings_' + str(repeat) + '.txt'
  231. runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_best_settings_' + str(repeat) + '.txt'
  232. ged_file = open(ged_filename, 'a')
  233. runtime_file = open(runtime_filename, 'a')
  234. gedlibpy.restart_env()
  235. gedlibpy.load_GXL_graphs(graph_dir, collection_file)
  236. listID = gedlibpy.get_all_graph_ids()
  237. gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
  238. gedlibpy.init()
  239. gedlibpy.set_method("IPFP", algo_options)
  240. gedlibpy.init_method()
  241. ged_mat = np.empty((len(listID), len(listID)))
  242. runtime_mat = np.empty((len(listID), len(listID)))
  243. for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
  244. for j in range(len(listID)):
  245. g1 = listID[i]
  246. g2 = listID[j]
  247. gedlibpy.run_method(g1, g2)
  248. upper_bound = gedlibpy.get_upper_bound(g1, g2)
  249. runtime = gedlibpy.get_runtime(g1, g2)
  250. ged_mat[i][j] = upper_bound
  251. runtime_mat[i][j] = runtime
  252. # Write to files.
  253. ged_file.write(str(int(upper_bound)) + ' ')
  254. runtime_file.write(str(runtime) + ' ')
  255. ged_file.write('\n')
  256. runtime_file.write('\n')
  257. ged_file.close()
  258. runtime_file.close()
  259. print('ged_mat')
  260. print(ged_mat)
  261. print('runtime_mat:')
  262. print(runtime_mat)
  263. return
  264. def test_ged_default():
  265. """Test ged computation with default settings.
  266. """
  267. data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
  268. collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
  269. graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
  270. for repeat in range(3):
  271. # Generate the result file.
  272. ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_default_' + str(repeat) + '.txt'
  273. runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_default_' + str(repeat) + '.txt'
  274. ged_file = open(ged_filename, 'a')
  275. runtime_file = open(runtime_filename, 'a')
  276. gedlibpy.restart_env()
  277. gedlibpy.load_GXL_graphs(graph_dir, collection_file)
  278. listID = gedlibpy.get_all_graph_ids()
  279. gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
  280. gedlibpy.init()
  281. gedlibpy.set_method("IPFP", "")
  282. gedlibpy.init_method()
  283. ged_mat = np.empty((len(listID), len(listID)))
  284. runtime_mat = np.empty((len(listID), len(listID)))
  285. for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
  286. for j in range(len(listID)):
  287. g1 = listID[i]
  288. g2 = listID[j]
  289. gedlibpy.run_method(g1, g2)
  290. upper_bound = gedlibpy.get_upper_bound(g1, g2)
  291. runtime = gedlibpy.get_runtime(g1, g2)
  292. ged_mat[i][j] = upper_bound
  293. runtime_mat[i][j] = runtime
  294. # Write to files.
  295. ged_file.write(str(int(upper_bound)) + ' ')
  296. runtime_file.write(str(runtime) + ' ')
  297. ged_file.write('\n')
  298. runtime_file.write('\n')
  299. ged_file.close()
  300. runtime_file.close()
  301. print('ged_mat')
  302. print(ged_mat)
  303. print('runtime_mat:')
  304. print(runtime_mat)
  305. return
  306. def test_ged_min():
  307. """Test ged computation with the "min" stabilizer.
  308. """
  309. from gklearn.utils.graphfiles import loadDataset
  310. from gklearn.preimage.ged import GED
  311. data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
  312. collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
  313. graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
  314. Gn, y = loadDataset(collection_file, extra_params=graph_dir)
  315. # algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
  316. for repeat in range(0, 3):
  317. # Generate the result file.
  318. ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_min_' + str(repeat) + '.txt'
  319. # runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
  320. ged_file = open(ged_filename, 'a')
  321. # runtime_file = open(runtime_filename, 'a')
  322. ged_mat = np.empty((len(Gn), len(Gn)))
  323. # runtime_mat = np.empty((len(Gn), len(Gn)))
  324. for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  325. for j in range(len(Gn)):
  326. g1 = Gn[i]
  327. g2 = Gn[j]
  328. upper_bound, _, _ = GED(g1, g2, lib='gedlibpy', cost='CONSTANT',
  329. method='IPFP',
  330. edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
  331. stabilizer='min', repeat=10)
  332. # runtime = gedlibpy.get_runtime(g1, g2)
  333. ged_mat[i][j] = upper_bound
  334. # runtime_mat[i][j] = runtime
  335. # Write to files.
  336. ged_file.write(str(int(upper_bound)) + ' ')
  337. # runtime_file.write(str(runtime) + ' ')
  338. ged_file.write('\n')
  339. # runtime_file.write('\n')
  340. ged_file.close()
  341. # runtime_file.close()
  342. print('ged_mat')
  343. print(ged_mat)
  344. # print('runtime_mat:')
  345. # print(runtime_mat)
  346. return
  347. def init() :
  348. print("List of Edit Cost Options : ")
  349. for i in gedlibpy.list_of_edit_cost_options :
  350. print (i)
  351. print("")
  352. print("List of Method Options : ")
  353. for j in gedlibpy.list_of_method_options :
  354. print (j)
  355. print("")
  356. print("List of Init Options : ")
  357. for k in gedlibpy.list_of_init_options :
  358. print (k)
  359. print("")
  360. def convertGraph(G):
  361. G_new = nx.Graph()
  362. for nd, attrs in G.nodes(data=True):
  363. G_new.add_node(str(nd), chem=attrs['atom'])
  364. for nd1, nd2, attrs in G.edges(data=True):
  365. G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
  366. return G_new
  367. def testNxGrapĥ():
  368. from gklearn.utils.graphfiles import loadDataset
  369. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  370. 'extra_params': {}} # node/edge symb
  371. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  372. gedlibpy.restart_env()
  373. for graph in Gn:
  374. g_new = convertGraph(graph)
  375. gedlibpy.add_nx_graph(g_new, "")
  376. listID = gedlibpy.get_all_graph_ids()
  377. gedlibpy.set_edit_cost("CHEM_1")
  378. gedlibpy.init()
  379. gedlibpy.set_method("IPFP", "")
  380. gedlibpy.init_method()
  381. print(listID)
  382. g = listID[0]
  383. h = listID[1]
  384. gedlibpy.run_method(g, h)
  385. print("Node Map : ", gedlibpy.get_node_map(g, h))
  386. print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
  387. print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
  388. if __name__ == '__main__':
  389. # test_ged_default()
  390. # test_ged_min()
  391. # test_ged_best_settings()
  392. # test_ged_best_settings_updated()
  393. # test_ged_python_bash_cpp()
  394. # test_get_nb_edit_operations()
  395. # test_get_nb_edit_operations_letter()
  396. # test_LETTER2_cost()
  397. test_NON_SYMBOLIC_cost()
  398. #init()
  399. #testNxGrapĥ()

A Python package for graph kernels, graph edit distances and graph pre-image problem.