You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

xp_fit_method.py 44 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Jan 14 15:39:29 2020
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import random
  9. import csv
  10. from shutil import copyfile
  11. import networkx as nx
  12. import matplotlib.pyplot as plt
  13. import os
  14. import time
  15. from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
  16. from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
  17. from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
  18. from gklearn.preimage.find_best_k import getRelations
  19. def get_dataset(ds_name):
  20. if ds_name == 'Letter-high': # node non-symb
  21. dataset = 'cpp_ext/data/collections/Letter.xml'
  22. graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
  23. Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
  24. for G in Gn:
  25. reform_attributes(G, na_names=['x', 'y'])
  26. G.graph['node_labels'] = []
  27. G.graph['edge_labels'] = []
  28. G.graph['node_attrs'] = ['x', 'y']
  29. G.graph['edge_attrs'] = []
  30. elif ds_name == 'Letter-med': # node non-symb
  31. dataset = 'cpp_ext/data/collections/Letter.xml'
  32. graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/MED/'
  33. Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
  34. for G in Gn:
  35. reform_attributes(G, na_names=['x', 'y'])
  36. G.graph['node_labels'] = []
  37. G.graph['edge_labels'] = []
  38. G.graph['node_attrs'] = ['x', 'y']
  39. G.graph['edge_attrs'] = []
  40. elif ds_name == 'Letter-low': # node non-symb
  41. dataset = 'cpp_ext/data/collections/Letter.xml'
  42. graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/LOW/'
  43. Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
  44. for G in Gn:
  45. reform_attributes(G, na_names=['x', 'y'])
  46. G.graph['node_labels'] = []
  47. G.graph['edge_labels'] = []
  48. G.graph['node_attrs'] = ['x', 'y']
  49. G.graph['edge_attrs'] = []
  50. elif ds_name == 'Fingerprint':
  51. # dataset = 'cpp_ext/data/collections/Fingerprint.xml'
  52. # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
  53. # Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
  54. # for G in Gn:
  55. # reform_attributes(G)
  56. dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
  57. graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
  58. Gn, y_all = loadDataset(dataset)
  59. elif ds_name == 'SYNTHETIC':
  60. pass
  61. elif ds_name == 'SYNTHETICnew':
  62. dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
  63. graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
  64. # dataset = '../../datasets/Letter-high/Letter-high_A.txt'
  65. # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
  66. Gn, y_all = loadDataset(dataset)
  67. elif ds_name == 'Synthie':
  68. pass
  69. elif ds_name == 'COIL-DEL':
  70. dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
  71. graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/'
  72. Gn, y_all = loadDataset(dataset)
  73. elif ds_name == 'COIL-RAG':
  74. pass
  75. elif ds_name == 'COLORS-3':
  76. pass
  77. elif ds_name == 'FRANKENSTEIN':
  78. pass
  79. return Gn, y_all, graph_dir
  80. def init_output_file(ds_name, gkernel, fit_method, dir_output):
  81. # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
  82. fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
  83. f_detail = open(dir_output + fn_output_detail, 'a')
  84. csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost',
  85. 'GED method', 'attr distance', 'fit method', 'k',
  86. 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
  87. 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
  88. 'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
  89. 'median set'])
  90. f_detail.close()
  91. # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
  92. fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
  93. f_summary = open(dir_output + fn_output_summary, 'a')
  94. csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost',
  95. 'GED method', 'attr distance', 'fit method', 'k',
  96. 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
  97. 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
  98. 'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
  99. '# SOD SM -> GM', '# dis_k SM -> GM',
  100. '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM',
  101. 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',
  102. 'repeats better dis_k gi -> GM'])
  103. f_summary.close()
  104. return fn_output_detail, fn_output_summary
  105. def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1,
  106. Gn_data=None, k_dis_data=None, Kmatrix=None,
  107. is_separate=False):
  108. # 1. set parameters.
  109. print('1. setting parameters...')
  110. ds_name = parameters['ds_name']
  111. gkernel = parameters['gkernel']
  112. edit_cost_name = parameters['edit_cost_name']
  113. ged_method = parameters['ged_method']
  114. attr_distance = parameters['attr_distance']
  115. fit_method = parameters['fit_method']
  116. init_ecc = parameters['init_ecc']
  117. node_label = None
  118. edge_label = None
  119. dir_output = 'results/xp_fit_method/'
  120. # 2. get dataset.
  121. print('2. getting dataset...')
  122. if Gn_data is None:
  123. Gn, y_all, graph_dir = get_dataset(ds_name)
  124. else:
  125. Gn = Gn_data[0]
  126. y_all = Gn_data[1]
  127. graph_dir = Gn_data[2]
  128. # 3. compute kernel distance matrix.
  129. print('3. computing kernel distance matrix...')
  130. if k_dis_data is None:
  131. dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None,
  132. None, Kmatrix=Kmatrix, gkernel=gkernel)
  133. else:
  134. # dis_mat = k_dis_data[0]
  135. # dis_max = k_dis_data[1]
  136. # dis_min = k_dis_data[2]
  137. # dis_mean = k_dis_data[3]
  138. # print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
  139. pass
  140. if save_results:
  141. # create result files.
  142. print('creating output files...')
  143. fn_output_detail, fn_output_summary = init_output_file(ds_name, gkernel,
  144. fit_method, dir_output)
  145. # start repeats.
  146. repeats = 1
  147. # k_list = range(2, 11)
  148. k_list = [0]
  149. # get indices by classes.
  150. y_idx = get_same_item_indices(y_all)
  151. random.seed(1)
  152. rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
  153. for k in k_list:
  154. # print('\n--------- k =', k, '----------')
  155. sod_sm_mean_list = []
  156. sod_gm_mean_list = []
  157. dis_k_sm_mean_list = []
  158. dis_k_gm_mean_list = []
  159. dis_k_gi_min_mean_list = []
  160. time_fitting_mean_list = []
  161. time_generating_mean_list = []
  162. time_total_mean_list = []
  163. # 3. start generating and computing over targets.
  164. print('4. starting generating and computing over targets......')
  165. for i, (y, values) in enumerate(y_idx.items()):
  166. # y = 'I'
  167. # values = y_idx[y]
  168. # values = values[0:10]
  169. print('\ny =', y)
  170. # if y.strip() == 'A':
  171. # continue
  172. k = len(values)
  173. print('\n--------- k =', k, '----------')
  174. if k < 2:
  175. print('\nk = ', k, ', skip.\n')
  176. continue
  177. sod_sm_list = []
  178. sod_gm_list = []
  179. dis_k_sm_list = []
  180. dis_k_gm_list = []
  181. dis_k_gi_min_list = []
  182. time_fitting_list = []
  183. time_generating_list = []
  184. time_total_list = []
  185. nb_sod_sm2gm = [0, 0, 0]
  186. nb_dis_k_sm2gm = [0, 0, 0]
  187. nb_dis_k_gi2sm = [0, 0, 0]
  188. nb_dis_k_gi2gm = [0, 0, 0]
  189. repeats_better_sod_sm2gm = []
  190. repeats_better_dis_k_sm2gm = []
  191. repeats_better_dis_k_gi2sm = []
  192. repeats_better_dis_k_gi2gm = []
  193. # get Gram matrix for this part of data.
  194. if Kmatrix is not None:
  195. if is_separate:
  196. Kmatrix_sub = Kmatrix[i].copy()
  197. else:
  198. Kmatrix_sub = Kmatrix[values,:]
  199. Kmatrix_sub = Kmatrix_sub[:,values]
  200. else:
  201. Kmatrix_sub = None
  202. for repeat in range(repeats):
  203. print('\nrepeat =', repeat)
  204. random.seed(rdn_seed_list[repeat])
  205. median_set_idx_idx = random.sample(range(0, len(values)), k)
  206. median_set_idx = [values[idx] for idx in median_set_idx_idx]
  207. print('median set: ', median_set_idx)
  208. Gn_median = [Gn[g] for g in values]
  209. # from notebooks.utils.plot_all_graphs import draw_Fingerprint_graph
  210. # for Gn in Gn_median:
  211. # draw_Fingerprint_graph(Gn, save=None)
  212. # GENERATING & COMPUTING!!
  213. res_sods, res_dis_ks, res_times = median_on_k_closest_graphs(Gn_median,
  214. node_label, edge_label,
  215. gkernel, k, fit_method=fit_method, graph_dir=graph_dir,
  216. edit_cost_constants=None, group_min=median_set_idx_idx,
  217. dataset=ds_name, initial_solutions=initial_solutions,
  218. edit_cost_name=edit_cost_name, init_ecc=init_ecc,
  219. Kmatrix=Kmatrix_sub, parallel=False)
  220. sod_sm = res_sods[0]
  221. sod_gm = res_sods[1]
  222. dis_k_sm = res_dis_ks[0]
  223. dis_k_gm = res_dis_ks[1]
  224. dis_k_gi = res_dis_ks[2]
  225. dis_k_gi_min = res_dis_ks[3]
  226. idx_dis_k_gi_min = res_dis_ks[4]
  227. time_fitting = res_times[0]
  228. time_generating = res_times[1]
  229. # write result detail.
  230. sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
  231. dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
  232. dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
  233. dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
  234. if save_results:
  235. f_detail = open(dir_output + fn_output_detail, 'a')
  236. csv.writer(f_detail).writerow([ds_name, gkernel,
  237. edit_cost_name, ged_method, attr_distance,
  238. fit_method, k, y, repeat,
  239. sod_sm, sod_gm, dis_k_sm, dis_k_gm,
  240. dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
  241. dis_k_gi2gm, time_fitting, time_generating,
  242. time_fitting + time_generating, median_set_idx])
  243. f_detail.close()
  244. # compute result summary.
  245. sod_sm_list.append(sod_sm)
  246. sod_gm_list.append(sod_gm)
  247. dis_k_sm_list.append(dis_k_sm)
  248. dis_k_gm_list.append(dis_k_gm)
  249. dis_k_gi_min_list.append(dis_k_gi_min)
  250. time_fitting_list.append(time_fitting)
  251. time_generating_list.append(time_generating)
  252. time_total_list.append(time_fitting + time_generating)
  253. # # SOD SM -> GM
  254. if sod_sm > sod_gm:
  255. nb_sod_sm2gm[0] += 1
  256. repeats_better_sod_sm2gm.append(repeat)
  257. elif sod_sm == sod_gm:
  258. nb_sod_sm2gm[1] += 1
  259. elif sod_sm < sod_gm:
  260. nb_sod_sm2gm[2] += 1
  261. # # dis_k SM -> GM
  262. if dis_k_sm > dis_k_gm:
  263. nb_dis_k_sm2gm[0] += 1
  264. repeats_better_dis_k_sm2gm.append(repeat)
  265. elif dis_k_sm == dis_k_gm:
  266. nb_dis_k_sm2gm[1] += 1
  267. elif dis_k_sm < dis_k_gm:
  268. nb_dis_k_sm2gm[2] += 1
  269. # # dis_k gi -> SM
  270. if dis_k_gi_min > dis_k_sm:
  271. nb_dis_k_gi2sm[0] += 1
  272. repeats_better_dis_k_gi2sm.append(repeat)
  273. elif dis_k_gi_min == dis_k_sm:
  274. nb_dis_k_gi2sm[1] += 1
  275. elif dis_k_gi_min < dis_k_sm:
  276. nb_dis_k_gi2sm[2] += 1
  277. # # dis_k gi -> GM
  278. if dis_k_gi_min > dis_k_gm:
  279. nb_dis_k_gi2gm[0] += 1
  280. repeats_better_dis_k_gi2gm.append(repeat)
  281. elif dis_k_gi_min == dis_k_gm:
  282. nb_dis_k_gi2gm[1] += 1
  283. elif dis_k_gi_min < dis_k_gm:
  284. nb_dis_k_gi2gm[2] += 1
  285. # save median graphs.
  286. fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
  287. fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
  288. + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
  289. copyfile(fname_sm, fn_pre_sm_new + '.gxl')
  290. fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
  291. fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
  292. + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
  293. copyfile(fname_gm, fn_pre_gm_new + '.gxl')
  294. G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
  295. # reform_attributes(G_best_kernel)
  296. fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
  297. + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
  298. saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='default')
  299. # plot median graphs.
  300. if ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
  301. set_median = loadGXL(fn_pre_sm_new + '.gxl')
  302. gen_median = loadGXL(fn_pre_gm_new + '.gxl')
  303. draw_Letter_graph(set_median, fn_pre_sm_new)
  304. draw_Letter_graph(gen_median, fn_pre_gm_new)
  305. draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
  306. # write result summary for each letter.
  307. sod_sm_mean_list.append(np.mean(sod_sm_list))
  308. sod_gm_mean_list.append(np.mean(sod_gm_list))
  309. dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
  310. dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
  311. dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
  312. time_fitting_mean_list.append(np.mean(time_fitting_list))
  313. time_generating_mean_list.append(np.mean(time_generating_list))
  314. time_total_mean_list.append(np.mean(time_total_list))
  315. sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
  316. dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
  317. dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
  318. dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
  319. if save_results:
  320. f_summary = open(dir_output + fn_output_summary, 'a')
  321. csv.writer(f_summary).writerow([ds_name, gkernel,
  322. edit_cost_name, ged_method, attr_distance,
  323. fit_method, k, y,
  324. sod_sm_mean_list[-1], sod_gm_mean_list[-1],
  325. dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
  326. dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean,
  327. dis_k_gi2sm_mean, dis_k_gi2gm_mean,
  328. time_fitting_mean_list[-1], time_generating_mean_list[-1],
  329. time_total_mean_list[-1], nb_sod_sm2gm,
  330. nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm,
  331. repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm,
  332. repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
  333. f_summary.close()
  334. # write result summary for each letter.
  335. sod_sm_mean = np.mean(sod_sm_mean_list)
  336. sod_gm_mean = np.mean(sod_gm_mean_list)
  337. dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
  338. dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
  339. dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
  340. time_fitting_mean = np.mean(time_fitting_list)
  341. time_generating_mean = np.mean(time_generating_list)
  342. time_total_mean = np.mean(time_total_list)
  343. sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
  344. dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
  345. dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
  346. dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
  347. if save_results:
  348. f_summary = open(dir_output + fn_output_summary, 'a')
  349. csv.writer(f_summary).writerow([ds_name, gkernel,
  350. edit_cost_name, ged_method, attr_distance,
  351. fit_method, k, 'all',
  352. sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
  353. dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean,
  354. dis_k_gi2sm_mean, dis_k_gi2gm_mean,
  355. time_fitting_mean, time_generating_mean, time_total_mean])
  356. f_summary.close()
  357. print('\ncomplete.')
  358. #Dessin median courrant
  359. def draw_Letter_graph(graph, file_prefix):
  360. plt.figure()
  361. pos = {}
  362. for n in graph.nodes:
  363. pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
  364. nx.draw_networkx(graph, pos)
  365. plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
  366. # plt.show()
  367. plt.clf()
  368. def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True):
  369. if is_separate:
  370. print('the Gram matrix is computed for each class.')
  371. y_idx = get_same_item_indices(y_all)
  372. Kmatrix = []
  373. run_time = []
  374. k_dis_data = []
  375. for i, (y, values) in enumerate(y_idx.items()):
  376. print('The ', str(i), ' class:')
  377. Gn_i = [Gn[val] for val in values]
  378. time0 = time.time()
  379. Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel))
  380. run_time.append(time.time() - time0)
  381. k_dis_data.append(kernel_distance_matrix(Gn_i, None, None,
  382. Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True))
  383. np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  384. Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
  385. dis_max = np.max([item[1] for item in k_dis_data])
  386. dis_min = np.min([item[2] for item in k_dis_data])
  387. dis_mean = np.mean([item[3] for item in k_dis_data])
  388. print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min,
  389. dis_mean)
  390. else:
  391. time0 = time.time()
  392. Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel)
  393. run_time = time.time() - time0
  394. np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  395. Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
  396. k_dis_data = kernel_distance_matrix(Gn, None, None,
  397. Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  398. print('the Gram matrix is computed for the whole dataset.')
  399. print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1],
  400. k_dis_data[2], k_dis_data[3])
  401. print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  402. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]
  403. return Kmatrix, run_time, k_dis_data
  404. if __name__ == "__main__":
  405. # #### xp 1: Letter-high, spkernel.
  406. # # load dataset.
  407. # print('getting dataset and computing kernel distance matrix first...')
  408. # ds_name = 'Letter-high'
  409. # gkernel = 'spkernel'
  410. # Gn, y_all, graph_dir = get_dataset(ds_name)
  411. # # remove graphs without edges.
  412. # Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
  413. # idx = [G[0] for G in Gn]
  414. # Gn = [G[1] for G in Gn]
  415. # y_all = [y_all[i] for i in idx]
  416. ## Gn = Gn[0:50]
  417. ## y_all = y_all[0:50]
  418. # # compute pair distances.
  419. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  420. # Kmatrix=None, gkernel=gkernel, verbose=True)
  421. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  422. # # fitting and computing.
  423. # fit_methods = ['random', 'expert', 'k-graphs']
  424. # for fit_method in fit_methods:
  425. # print('\n-------------------------------------')
  426. # print('fit method:', fit_method)
  427. # parameters = {'ds_name': ds_name,
  428. # 'gkernel': gkernel,
  429. # 'edit_cost_name': 'LETTER2',
  430. # 'ged_method': 'mIPFP',
  431. # 'attr_distance': 'euclidean',
  432. # 'fit_method': fit_method}
  433. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  434. # initial_solutions=40,
  435. # Gn_data = [Gn, y_all, graph_dir],
  436. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
  437. # #### xp 2: Letter-high, sspkernel.
  438. # # load dataset.
  439. # print('getting dataset and computing kernel distance matrix first...')
  440. # ds_name = 'Letter-high'
  441. # gkernel = 'structuralspkernel'
  442. # Gn, y_all, graph_dir = get_dataset(ds_name)
  443. ## Gn = Gn[0:50]
  444. ## y_all = y_all[0:50]
  445. # # compute pair distances.
  446. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  447. # Kmatrix=None, gkernel=gkernel, verbose=True)
  448. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  449. # # fitting and computing.
  450. # fit_methods = ['random', 'expert', 'k-graphs']
  451. # for fit_method in fit_methods:
  452. # print('\n-------------------------------------')
  453. # print('fit method:', fit_method)
  454. # parameters = {'ds_name': ds_name,
  455. # 'gkernel': gkernel,
  456. # 'edit_cost_name': 'LETTER2',
  457. # 'ged_method': 'mIPFP',
  458. # 'attr_distance': 'euclidean',
  459. # 'fit_method': fit_method}
  460. # print('parameters: ', parameters)
  461. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  462. # initial_solutions=40,
  463. # Gn_data = [Gn, y_all, graph_dir],
  464. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
  465. # #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
  466. # gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
  467. # Kmatrix = gmfile['Kmatrix']
  468. # run_time = gmfile['run_time']
  469. # # normalization
  470. # Kmatrix_diag = Kmatrix.diagonal().copy()
  471. # for i in range(len(Kmatrix)):
  472. # for j in range(i, len(Kmatrix)):
  473. # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  474. # Kmatrix[j][i] = Kmatrix[i][j]
  475. ## np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
  476. ## Kmatrix=Kmatrix, run_time=run_time)
  477. # # load dataset.
  478. # print('getting dataset and computing kernel distance matrix first...')
  479. # ds_name = 'SYNTHETICnew'
  480. # gkernel = 'structuralspkernel'
  481. # Gn, y_all, graph_dir = get_dataset(ds_name)
  482. # # remove graphs without nodes and edges.
  483. # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
  484. # and nx.number_of_edges(G) != 0)]
  485. # idx = [G[0] for G in Gn]
  486. # Gn = [G[1] for G in Gn]
  487. # y_all = [y_all[i] for i in idx]
  488. ## Gn = Gn[0:10]
  489. ## y_all = y_all[0:10]
  490. # for G in Gn:
  491. # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
  492. # # compute pair distances.
  493. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  494. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  495. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  496. # # fitting and computing.
  497. # fit_methods = ['k-graphs', 'random', 'random', 'random']
  498. # for fit_method in fit_methods:
  499. # print('\n-------------------------------------')
  500. # print('fit method:', fit_method)
  501. # parameters = {'ds_name': ds_name,
  502. # 'gkernel': gkernel,
  503. # 'edit_cost_name': 'NON_SYMBOLIC',
  504. # 'ged_method': 'mIPFP',
  505. # 'attr_distance': 'euclidean',
  506. # 'fit_method': fit_method}
  507. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  508. # initial_solutions=1,
  509. # Gn_data = [Gn, y_all, graph_dir],
  510. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
  511. # Kmatrix=Kmatrix)
  512. # ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
  513. # gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
  514. # Kmatrix = gmfile['Kmatrix']
  515. # # normalization
  516. # Kmatrix_diag = Kmatrix.diagonal().copy()
  517. # for i in range(len(Kmatrix)):
  518. # for j in range(i, len(Kmatrix)):
  519. # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  520. # Kmatrix[j][i] = Kmatrix[i][j]
  521. # run_time = 21821.35
  522. # np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
  523. # Kmatrix=Kmatrix, run_time=run_time)
  524. #
  525. # # load dataset.
  526. # print('getting dataset and computing kernel distance matrix first...')
  527. # ds_name = 'SYNTHETICnew'
  528. # gkernel = 'spkernel'
  529. # Gn, y_all, graph_dir = get_dataset(ds_name)
  530. ## # remove graphs without nodes and edges.
  531. ## Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
  532. ## and nx.number_of_edges(G) != 0)]
  533. ## idx = [G[0] for G in Gn]
  534. ## Gn = [G[1] for G in Gn]
  535. ## y_all = [y_all[i] for i in idx]
  536. ## Gn = Gn[0:5]
  537. ## y_all = y_all[0:5]
  538. # for G in Gn:
  539. # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
  540. #
  541. # # compute/read Gram matrix and pair distances.
  542. ## Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
  543. ## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  544. ## Kmatrix=Kmatrix)
  545. # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  546. # Kmatrix = gmfile['Kmatrix']
  547. # run_time = gmfile['run_time']
  548. ## Kmatrix = Kmatrix[[0,1,2,3,4],:]
  549. ## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  550. # print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  551. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  552. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  553. ## Kmatrix = np.zeros((len(Gn), len(Gn)))
  554. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  555. #
  556. # # fitting and computing.
  557. # fit_methods = ['k-graphs', 'random', 'random', 'random']
  558. # for fit_method in fit_methods:
  559. # print('\n-------------------------------------')
  560. # print('fit method:', fit_method)
  561. # parameters = {'ds_name': ds_name,
  562. # 'gkernel': gkernel,
  563. # 'edit_cost_name': 'NON_SYMBOLIC',
  564. # 'ged_method': 'mIPFP',
  565. # 'attr_distance': 'euclidean',
  566. # 'fit_method': fit_method}
  567. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  568. # initial_solutions=1,
  569. # Gn_data=[Gn, y_all, graph_dir],
  570. # k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
  571. # Kmatrix=Kmatrix)
  572. # #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs.
  573. # # load dataset.
  574. # print('getting dataset and computing kernel distance matrix first...')
  575. # ds_name = 'Fingerprint'
  576. # gkernel = 'structuralspkernel'
  577. # Gn, y_all, graph_dir = get_dataset(ds_name)
  578. # # remove graphs without nodes and edges.
  579. # Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
  580. ## and nx.number_of_edges(G) != 0)]
  581. # idx = [G[0] for G in Gn]
  582. # Gn = [G[1] for G in Gn]
  583. # y_all = [y_all[i] for i in idx]
  584. # y_idx = get_same_item_indices(y_all)
  585. # # remove unused labels.
  586. # for G in Gn:
  587. # G.graph['edge_attrs'] = []
  588. # for edge in G.edges:
  589. # del G.edges[edge]['attributes']
  590. # del G.edges[edge]['orient']
  591. # del G.edges[edge]['angle']
  592. ## Gn = Gn[805:815]
  593. ## y_all = y_all[805:815]
  594. # for G in Gn:
  595. # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
  596. #
  597. # # compute/read Gram matrix and pair distances.
  598. ## Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
  599. ## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  600. ## Kmatrix=Kmatrix)
  601. # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  602. # Kmatrix = gmfile['Kmatrix']
  603. ## run_time = gmfile['run_time']
  604. ## Kmatrix = Kmatrix[[0,1,2,3,4],:]
  605. ## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  606. ## print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  607. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  608. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  609. ## Kmatrix = np.zeros((len(Gn), len(Gn)))
  610. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  611. #
  612. # # fitting and computing.
  613. # fit_methods = ['k-graphs', 'random', 'random', 'random']
  614. # for fit_method in fit_methods:
  615. # print('\n-------------------------------------')
  616. # print('fit method:', fit_method)
  617. # parameters = {'ds_name': ds_name,
  618. # 'gkernel': gkernel,
  619. # 'edit_cost_name': 'LETTER2',
  620. # 'ged_method': 'mIPFP',
  621. # 'attr_distance': 'euclidean',
  622. # 'fit_method': fit_method,
  623. # 'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
  624. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  625. # initial_solutions=40,
  626. # Gn_data = [Gn, y_all, graph_dir],
  627. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
  628. # Kmatrix=Kmatrix)
  629. # #### xp 6: Letter-med, sspkernel.
  630. # # load dataset.
  631. # print('getting dataset and computing kernel distance matrix first...')
  632. # ds_name = 'Letter-med'
  633. # gkernel = 'structuralspkernel'
  634. # Gn, y_all, graph_dir = get_dataset(ds_name)
  635. ## Gn = Gn[0:50]
  636. ## y_all = y_all[0:50]
  637. #
  638. # # compute/read Gram matrix and pair distances.
  639. # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
  640. # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  641. # Kmatrix=Kmatrix)
  642. ## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  643. ## Kmatrix = gmfile['Kmatrix']
  644. ## run_time = gmfile['run_time']
  645. ## Kmatrix = Kmatrix[[0,1,2,3,4],:]
  646. ## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  647. ## print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  648. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  649. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  650. ## Kmatrix = np.zeros((len(Gn), len(Gn)))
  651. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  652. #
  653. # # fitting and computing.
  654. # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
  655. # for fit_method in fit_methods:
  656. # print('\n-------------------------------------')
  657. # print('fit method:', fit_method)
  658. # parameters = {'ds_name': ds_name,
  659. # 'gkernel': gkernel,
  660. # 'edit_cost_name': 'LETTER2',
  661. # 'ged_method': 'mIPFP',
  662. # 'attr_distance': 'euclidean',
  663. # 'fit_method': fit_method,
  664. # 'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
  665. # print('parameters: ', parameters)
  666. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  667. # initial_solutions=40,
  668. # Gn_data = [Gn, y_all, graph_dir],
  669. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
  670. # Kmatrix=Kmatrix)
  671. # #### xp 7: Letter-low, sspkernel.
  672. # # load dataset.
  673. # print('getting dataset and computing kernel distance matrix first...')
  674. # ds_name = 'Letter-low'
  675. # gkernel = 'structuralspkernel'
  676. # Gn, y_all, graph_dir = get_dataset(ds_name)
  677. ## Gn = Gn[0:50]
  678. ## y_all = y_all[0:50]
  679. #
  680. # # compute/read Gram matrix and pair distances.
  681. # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
  682. # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  683. # Kmatrix=Kmatrix)
  684. ## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  685. ## Kmatrix = gmfile['Kmatrix']
  686. ## run_time = gmfile['run_time']
  687. ## Kmatrix = Kmatrix[[0,1,2,3,4],:]
  688. ## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  689. ## print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  690. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  691. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  692. ## Kmatrix = np.zeros((len(Gn), len(Gn)))
  693. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  694. #
  695. # # fitting and computing.
  696. # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
  697. # for fit_method in fit_methods:
  698. # print('\n-------------------------------------')
  699. # print('fit method:', fit_method)
  700. # parameters = {'ds_name': ds_name,
  701. # 'gkernel': gkernel,
  702. # 'edit_cost_name': 'LETTER2',
  703. # 'ged_method': 'mIPFP',
  704. # 'attr_distance': 'euclidean',
  705. # 'fit_method': fit_method,
  706. # 'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
  707. # print('parameters: ', parameters)
  708. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  709. # initial_solutions=40,
  710. # Gn_data = [Gn, y_all, graph_dir],
  711. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
  712. # Kmatrix=Kmatrix)
  713. # #### xp 8: Letter-med, spkernel.
  714. # # load dataset.
  715. # print('getting dataset and computing kernel distance matrix first...')
  716. # ds_name = 'Letter-med'
  717. # gkernel = 'spkernel'
  718. # Gn, y_all, graph_dir = get_dataset(ds_name)
  719. # # remove graphs without nodes and edges.
  720. # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
  721. # and nx.number_of_edges(G) != 0)]
  722. # idx = [G[0] for G in Gn]
  723. # Gn = [G[1] for G in Gn]
  724. # y_all = [y_all[i] for i in idx]
  725. ## Gn = Gn[0:50]
  726. ## y_all = y_all[0:50]
  727. #
  728. # # compute/read Gram matrix and pair distances.
  729. # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
  730. # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  731. # Kmatrix=Kmatrix)
  732. ## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  733. ## Kmatrix = gmfile['Kmatrix']
  734. ## run_time = gmfile['run_time']
  735. ## Kmatrix = Kmatrix[[0,1,2,3,4],:]
  736. ## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  737. ## print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  738. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  739. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  740. ## Kmatrix = np.zeros((len(Gn), len(Gn)))
  741. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  742. #
  743. # # fitting and computing.
  744. # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
  745. # for fit_method in fit_methods:
  746. # print('\n-------------------------------------')
  747. # print('fit method:', fit_method)
  748. # parameters = {'ds_name': ds_name,
  749. # 'gkernel': gkernel,
  750. # 'edit_cost_name': 'LETTER2',
  751. # 'ged_method': 'mIPFP',
  752. # 'attr_distance': 'euclidean',
  753. # 'fit_method': fit_method,
  754. # 'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
  755. # print('parameters: ', parameters)
  756. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  757. # initial_solutions=40,
  758. # Gn_data = [Gn, y_all, graph_dir],
  759. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
  760. # Kmatrix=Kmatrix)
  761. # #### xp 9: Letter-low, spkernel.
  762. # # load dataset.
  763. # print('getting dataset and computing kernel distance matrix first...')
  764. # ds_name = 'Letter-low'
  765. # gkernel = 'spkernel'
  766. # Gn, y_all, graph_dir = get_dataset(ds_name)
  767. # # remove graphs without nodes and edges.
  768. # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
  769. # and nx.number_of_edges(G) != 0)]
  770. # idx = [G[0] for G in Gn]
  771. # Gn = [G[1] for G in Gn]
  772. # y_all = [y_all[i] for i in idx]
  773. ## Gn = Gn[0:50]
  774. ## y_all = y_all[0:50]
  775. #
  776. # # compute/read Gram matrix and pair distances.
  777. # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
  778. # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  779. # Kmatrix=Kmatrix)
  780. ## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  781. ## Kmatrix = gmfile['Kmatrix']
  782. ## run_time = gmfile['run_time']
  783. ## Kmatrix = Kmatrix[[0,1,2,3,4],:]
  784. ## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  785. ## print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  786. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  787. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  788. ## Kmatrix = np.zeros((len(Gn), len(Gn)))
  789. ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  790. #
  791. # # fitting and computing.
  792. # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
  793. # for fit_method in fit_methods:
  794. # print('\n-------------------------------------')
  795. # print('fit method:', fit_method)
  796. # parameters = {'ds_name': ds_name,
  797. # 'gkernel': gkernel,
  798. # 'edit_cost_name': 'LETTER2',
  799. # 'ged_method': 'mIPFP',
  800. # 'attr_distance': 'euclidean',
  801. # 'fit_method': fit_method,
  802. # 'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
  803. # print('parameters: ', parameters)
  804. # xp_fit_method_for_non_symbolic(parameters, save_results=True,
  805. # initial_solutions=40,
  806. # Gn_data = [Gn, y_all, graph_dir],
  807. # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
  808. # Kmatrix=Kmatrix)
  809. #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs.
  810. # load dataset.
  811. print('getting dataset and computing kernel distance matrix first...')
  812. ds_name = 'COIL-DEL'
  813. gkernel = 'structuralspkernel'
  814. Gn, y_all, graph_dir = get_dataset(ds_name)
  815. # remove graphs without nodes and edges.
  816. Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
  817. # and nx.number_of_edges(G) != 0)]
  818. idx = [G[0] for G in Gn]
  819. Gn = [G[1] for G in Gn]
  820. y_all = [y_all[i] for i in idx]
  821. # remove unused labels.
  822. for G in Gn:
  823. G.graph['edge_labels'] = []
  824. for edge in G.edges:
  825. del G.edges[edge]['bond_type']
  826. del G.edges[edge]['valence']
  827. # Gn = Gn[805:815]
  828. # y_all = y_all[805:815]
  829. for G in Gn:
  830. G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
  831. # compute/read Gram matrix and pair distances.
  832. is_separate = True
  833. Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn,
  834. y_all,
  835. gkernel,
  836. parallel='imap_unordered',
  837. is_separate=is_separate)
  838. # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
  839. # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
  840. # Kmatrix=Kmatrix)
  841. # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
  842. # Kmatrix = gmfile['Kmatrix']
  843. # run_time = gmfile['run_time']
  844. # Kmatrix = Kmatrix[[0,1,2,3,4],:]
  845. # Kmatrix = Kmatrix[:,[0,1,2,3,4]]
  846. # print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
  847. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
  848. # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
  849. # Kmatrix = np.zeros((len(Gn), len(Gn)))
  850. # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
  851. # fitting and computing.
  852. fit_methods = ['k-graphs', 'random', 'random', 'random']
  853. for fit_method in fit_methods:
  854. print('\n-------------------------------------')
  855. print('fit method:', fit_method)
  856. parameters = {'ds_name': ds_name,
  857. 'gkernel': gkernel,
  858. 'edit_cost_name': 'LETTER2',
  859. 'ged_method': 'mIPFP',
  860. 'attr_distance': 'euclidean',
  861. 'fit_method': fit_method,
  862. 'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
  863. xp_fit_method_for_non_symbolic(parameters, save_results=True,
  864. initial_solutions=40,
  865. Gn_data=[Gn, y_all, graph_dir],
  866. k_dis_data=k_dis_data,
  867. Kmatrix=Kmatrix,
  868. is_separate=is_separate)

A Python package for graph kernels, graph edit distances and graph pre-image problem.