|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jun 1 11:37:57 2020
-
- @author: ljia
- """
- import multiprocessing
- import numpy as np
- import networkx as nx
- import os
- from gklearn.utils.graphfiles import saveGXL
- from gklearn.preimage import RandomPreimageGenerator
- from gklearn.utils import Dataset
-
-
- dir_root = '../results/xp_random_preimage_generation/'
-
-
- def xp_random_preimage_generation(kernel_name):
- """
- Experiment similar to the one in Bakir's paper. A test to check if RandomPreimageGenerator class works correctly.
-
- Returns
- -------
- None.
-
- """
- alpha1_list = np.linspace(0, 1, 11)
- k_dis_datasets = []
- k_dis_preimages = []
- preimages = []
- bests_from_dataset = []
- for alpha1 in alpha1_list:
- print('alpha1 =', alpha1, ':\n')
- # set parameters.
- ds_name = 'MUTAG'
- rpg_options = {'k': 5,
- 'r_max': 10, #
- 'l': 500,
- 'alphas': None,
- 'parallel': True,
- 'verbose': 2}
- if kernel_name == 'PathUpToH':
- kernel_options = {'name': 'PathUpToH',
- 'depth': 2, #
- 'k_func': 'MinMax', #
- 'compute_method': 'trie',
- 'parallel': 'imap_unordered',
- # 'parallel': None,
- 'n_jobs': multiprocessing.cpu_count(),
- 'normalize': True,
- 'verbose': 0}
- elif kernel_name == 'Marginalized':
- kernel_options = {'name': 'Marginalized',
- 'p_quit': 0.8, #
- 'n_iteration': 7, #
- 'remove_totters': False,
- 'parallel': 'imap_unordered',
- # 'parallel': None,
- 'n_jobs': multiprocessing.cpu_count(),
- 'normalize': True,
- 'verbose': 0}
- edge_required = True
- irrelevant_labels = {'edge_labels': ['label_0']}
- cut_range = None
-
- # create/get Gram matrix.
- dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/'
- if not os.path.exists(dir_save):
- os.makedirs(dir_save)
- gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
- gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
- if gmfile_exist:
- gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
- gram_matrix_unnorm = gmfile['gram_matrix_unnorm']
- time_precompute_gm = gmfile['run_time']
-
- # 1. get dataset.
- print('1. getting dataset...')
- dataset_all = Dataset()
- dataset_all.load_predefined_dataset(ds_name)
- dataset_all.trim_dataset(edge_required=edge_required)
- if irrelevant_labels is not None:
- dataset_all.remove_labels(**irrelevant_labels)
- if cut_range is not None:
- dataset_all.cut_graphs(cut_range)
-
- # # add two "random" graphs.
- # g1 = nx.Graph()
- # g1.add_nodes_from(range(0, 16), label_0='0')
- # g1.add_nodes_from(range(16, 25), label_0='1')
- # g1.add_node(25, label_0='2')
- # g1.add_nodes_from([26, 27], label_0='3')
- # g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)])
- # g2 = nx.Graph()
- # g2.add_nodes_from(range(0, 12), label_0='0')
- # g2.add_nodes_from(range(12, 19), label_0='1')
- # g2.add_nodes_from([19, 20, 21], label_0='2')
- # g2.add_nodes_from([22, 23], label_0='3')
- # g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)])
- # dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None)
-
- # 2. initialize rpg and setting parameters.
- print('2. initializing rpg and setting parameters...')
- # nb_graphs = len(dataset_all.graphs) - 2
- # rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs
- nb_graphs = len(dataset_all.graphs)
- alphas = [0] * nb_graphs
- alphas[1] = alpha1
- alphas[6] = 1 - alpha1
- rpg_options['alphas'] = alphas
- if gmfile_exist:
- rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm
- rpg_options['runtime_precompute_gm'] = time_precompute_gm
- rpg = RandomPreimageGenerator()
- rpg.dataset = dataset_all
- rpg.set_options(**rpg_options.copy())
- rpg.kernel_options = kernel_options.copy()
-
- # 3. compute preimage.
- print('3. computing preimage...')
- rpg.run()
- results = rpg.get_results()
- k_dis_datasets.append(results['k_dis_dataset'])
- k_dis_preimages.append(results['k_dis_preimage'])
- bests_from_dataset.append(rpg.best_from_dataset)
- preimages.append(rpg.preimage)
-
- # 4. save results.
- # write Gram matrices to file.
- if not gmfile_exist:
- np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=rpg.gram_matrix_unnorm, run_time=results['runtime_precompute_gm'])
-
- # save graphs.
- fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
- saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default',
- node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels,
- node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs)
- fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
- saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default',
- node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels,
- node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs)
-
- # draw graphs.
- __draw_graph(rpg.best_from_dataset, fn_best_dataset)
- __draw_graph(rpg.preimage, fn_preimage)
-
- # save distances.
- np.savez(dir_save + 'distances.' + ds_name + '.' + kernel_options['name'], k_dis_datasets=k_dis_datasets, k_dis_preimages=k_dis_preimages)
-
- # plot results figure.
- __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save)
-
- print('\ncomplete.\n')
-
- return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages
-
-
- def __draw_graph(graph, file_prefix):
- # import matplotlib
- # matplotlib.use('agg')
- import matplotlib.pyplot as plt
- plt.figure()
- pos = nx.spring_layout(graph)
- nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'label_0'), font_color='w', width=3, with_labels=True)
- plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
- # plt.show()
- plt.clf()
- plt.close()
-
-
- def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save):
- import matplotlib.pyplot as plt
- fig, ax = plt.subplots(1, 1, figsize=(7, 4.5))
-
- ind = np.arange(len(alpha1_list)) # the x locations for the groups
- width = 0.35 # the width of the bars: can also be len(x) sequence
-
- ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC')
-
- ax.set_xlabel(r'$\alpha \in [0,1]$')
- ax.set_ylabel(r'$d(g_i,g^\star(\alpha))$')
- #ax.set_title('Runtime of the shortest path kernel on all datasets')
- plt.xticks(ind, [str(i)[0:3] for i in alpha1_list])
- #ax.set_yticks(np.logspace(-16, -3, num=20, base=10))
- #ax.set_ylim(bottom=1e-15)
- ax.grid(axis='y', zorder=0)
- ax.spines['top'].set_visible(False)
- ax.spines['bottom'].set_visible(False)
- ax.spines['left'].set_visible(False)
- ax.spines['right'].set_visible(False)
- ax.xaxis.set_ticks_position('none')
-
- ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4)
- ax.yaxis.set_ticks_position('none')
-
- fig.subplots_adjust(bottom=.2)
- fig.legend(loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
-
- plt.savefig(dir_save + 'distances in kernel space.eps', format='eps', dpi=300,
- transparent=True, bbox_inches='tight')
- plt.show()
- plt.clf()
- plt.close()
-
-
- if __name__ == '__main__':
- # kernel_name = 'PathUpToH'
- kernel_name = 'Marginalized'
- k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation(kernel_name)
-
- # # save graphs.
- # dir_save = dir_root + 'MUTAG.PathUpToH/'
- # for i, alpha1 in enumerate(np.linspace(0, 1, 11)):
- # fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
- # saveGXL(bests_from_dataset[i], fn_best_dataset + '.gxl', method='default',
- # node_labels=['label_0'], edge_labels=[],
- # node_attrs=[], edge_attrs=[])
- # fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
- # saveGXL(preimages[i], fn_preimage + '.gxl', method='default',
- # node_labels=['label_0'], edge_labels=[],
- # node_attrs=[], edge_attrs=[])
-
- # # draw graphs.
- # dir_save = dir_root + 'MUTAG.PathUpToH/'
- # for i, alpha1 in enumerate(np.linspace(0, 1, 11)):
- # fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
- # __draw_graph(bests_from_dataset[i], fn_best_dataset)
- # fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
- # __draw_graph(preimages[i], fn_preimage)
-
- # # plot results figure.
- # alpha1_list = np.linspace(0, 1, 11)
- # dir_save = dir_root + 'MUTAG.PathUpToH/'
- # __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save)
-
-
-
- # k_dis_datasets = [0.0,
- # 0.08882515554098754,
- # 0.17765031108197632,
- # 0.2664754666229643,
- # 0.35530062216395264,
- # 0.44412577770494066,
- # 0.35530062216395236,
- # 0.2664754666229643,
- # 0.17765031108197632,
- # 0.08882515554098878,
- # 0.0]
-
- # k_dis_preimages = [0.0,
- # 0.08882515554098754,
- # 0.17765031108197632,
- # 0.2664754666229643,
- # 0.35530062216395264,
- # 0.44412577770494066,
- # 0.35530062216395236,
- # 0.2664754666229643,
- # 0.17765031108197632,
- # 0.08882515554098878,
- # 0.0]
|