|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Thu Dec 19 17:16:23 2019
-
- @author: ljia
- """
- import numpy as np
- from sklearn.manifold import TSNE, Isomap
- import matplotlib.pyplot as plt
- from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
- from tqdm import tqdm
-
- from gklearn.utils.graphfiles import loadDataset, loadGXL
- from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices
-
-
- def visualize_graph_dataset(dis_measure, visual_method, draw_figure,
- draw_params={}, dis_mat=None, Gn=None,
- median_set=None):
-
-
- def draw_zoomed_axes(Gn_embedded, ax):
- margin = 0.01
- if dis_measure == 'graph-kernel':
- index = -2
- elif dis_measure == 'ged':
- index = -1
- x1 = np.min(Gn_embedded[median_set + [index], 0]) - margin * np.max(Gn_embedded)
- x2 = np.max(Gn_embedded[median_set + [index], 0]) + margin * np.max(Gn_embedded)
- y1 = np.min(Gn_embedded[median_set + [index], 1]) - margin * np.max(Gn_embedded)
- y2 = np.max(Gn_embedded[median_set + [index], 1]) + margin * np.max(Gn_embedded)
- if (x1 < 0 and y1 < 0) or ((x1 > 0 and y1 > 0)):
- loc = 2
- else:
- loc = 3
- axins = zoomed_inset_axes(ax, 4, loc=loc) # zoom-factor: 2.5, location: upper-left
- draw_figure(axins, Gn_embedded, dis_measure=dis_measure,
- median_set=median_set, **draw_params)
- axins.set_xlim(x1, x2) # apply the x-limits
- axins.set_ylim(y1, y2) # apply the y-limits
- plt.yticks(visible=False)
- plt.xticks(visible=False)
- loc1 = 1 if loc == 2 else 3
- mark_inset(ax, axins, loc1=2, loc2=4, fc="none", ec="0.5")
-
-
- if dis_mat is None:
- if dis_measure == 'graph-kernel':
- gkernel = 'untilhpathkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
- Kmatrix=None, gkernel=gkernel)
- elif dis_measure == 'ged':
- pass
-
- if visual_method == 'tsne':
- Gn_embedded = TSNE(n_components=2, metric='precomputed').fit_transform(dis_mat)
- elif visual_method == 'isomap':
- Gn_embedded = Isomap(n_components=2, metric='precomputed').fit_transform(dis_mat)
- print(Gn_embedded.shape)
- fig, ax = plt.subplots()
- draw_figure(plt, Gn_embedded, dis_measure=dis_measure, legend=True,
- median_set=median_set, **draw_params)
- # draw_zoomed_axes(Gn_embedded, ax)
- plt.show()
- plt.clf()
-
- return
-
-
- def draw_figure(ax, Gn_embedded, dis_measure=None, y_idx=None, legend=False,
- median_set=None):
- from matplotlib import colors as mcolors
- colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
- # colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
- # '#c6dbef', '#deebf7']
- # for i, values in enumerate(y_idx.values()):
- # for item in values:
- ## ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')
- # ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')
- # ax.scatter(Gn_embedded[:,0], Gn_embedded[:,1], c='b')
- h1 = ax.scatter(Gn_embedded[median_set, 0], Gn_embedded[median_set, 1], c='b')
- if dis_measure == 'graph-kernel':
- h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
- h3 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='gold') # gen median
- h4 = ax.scatter(Gn_embedded[-3, 0], Gn_embedded[-3, 1], c='r') #c='g', marker='+') # set median
- elif dis_measure == 'ged':
- h3 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='gold') # gen median
- h4 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='r') #c='g', marker='+') # set median
- if legend:
- # fig.subplots_adjust(bottom=0.17)
- if dis_measure == 'graph-kernel':
- ax.legend([h1, h2, h3, h4],
- ['k closest graphs', 'true median', 'gen median', 'set median'])
- elif dis_measure == 'ged':
- ax.legend([h1, h3, h4], ['k closest graphs', 'gen median', 'set median'])
- # fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
- # plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
- # bbox_inches='tight')
- # plt.show()
-
-
- ###############################################################################
-
- def visualize_distances_in_kernel():
-
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:50]
- fname_medians = 'expert.treelet'
- # add set median.
- fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
- set_median = loadGXL(fname_sm)
- Gn.append(set_median)
- # add generalized median (estimated pre-image.)
- fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
- gen_median = loadGXL(fname_gm)
- Gn.append(gen_median)
-
- # compute distance matrix
- median_set = [22, 29, 54, 74]
- gkernel = 'treeletkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- Gn_median_set = [Gn[i].copy() for i in median_set]
- Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label,
- edge_label, True)
- Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
- dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
- Kmatrix=Kmatrix, gkernel=gkernel)
- print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
- print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
- print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-
- # add distances for the image of exact median \psi.
- dis_k_median_list = []
- for idx, g in enumerate(Gn):
- dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)),
- [1 / len(Gn_median_set)] * len(Gn_median_set),
- Kmatrix_median, withterm3=False))
- dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
- for i in range(len(Gn)):
- for j in range(i, len(Gn)):
- dis_mat_median[i, j] = dis_mat[i, j]
- dis_mat_median[j, i] = dis_mat_median[i, j]
- for i in range(len(Gn)):
- dis_mat_median[i, -1] = dis_k_median_list[i]
- dis_mat_median[-1, i] = dis_k_median_list[i]
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
-
- # visualization.
- # visualize_graph_dataset('graph-kernel', 'tsne', Gn)
- # visualize_graph_dataset('graph-kernel', 'tsne', draw_figure,
- # draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
- visualize_graph_dataset('graph-kernel', 'tsne', draw_figure,
- draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
- median_set=median_set)
-
-
- def visualize_distances_in_ged():
- from gklearn.preimage.fitDistance import compute_geds
- from gklearn.preimage.ged import GED
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:50]
- # add set median.
- fname_medians = 'expert.treelet'
- fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
- set_median = loadGXL(fname_sm)
- Gn.append(set_median)
- # add generalized median (estimated pre-image.)
- fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
- gen_median = loadGXL(fname_gm)
- Gn.append(gen_median)
-
- # compute/load ged matrix.
- # # compute.
- ## k = 4
- ## edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
- # edit_costs = [3, 3, 1, 3, 3, 1]
- ## edit_costs = [7, 3, 5, 9, 2, 6]
- # algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- # params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
- # 'algo_options': algo_options, 'stabilizer': None,
- # 'edit_cost_constant': edit_costs}
- # _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
- # np.savez('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm', ged_mat=ged_mat)
- # load from file.
- gmfile = np.load('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm.npz')
- ged_mat = gmfile['ged_mat']
- # # change medians.
- # edit_costs = [3, 3, 1, 3, 3, 1]
- # algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- # params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
- # 'algo_options': algo_options, 'stabilizer': None,
- # 'edit_cost_constant': edit_costs}
- # for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
- # dis, _, _ = GED(Gn[idx], set_median, **params_ged)
- # ged_mat[idx, -2] = dis
- # ged_mat[-2, idx] = dis
- # dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
- # ged_mat[idx, -1] = dis
- # ged_mat[-1, idx] = dis
- # np.savez('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm',
- # ged_mat=ged_mat)
-
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
-
- # visualization.
- median_set = [22, 29, 54, 74]
- visualize_graph_dataset('ged', 'tsne', draw_figure,
- draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
- median_set=median_set)
-
- ###############################################################################
-
-
- def visualize_distances_in_kernel_monoterpenoides():
- import os
-
- ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
- 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
- Gn_original, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:50]
-
- # compute distance matrix
- # median_set = [22, 29, 54, 74]
- gkernel = 'treeletkernel'
- fit_method = 'expert'
- node_label = 'atom'
- edge_label = 'bond_type'
- ds_name = 'monoterpenoides'
- fname_medians = fit_method + '.' + gkernel
- dir_output = 'results/xp_monoterpenoides/'
- repeat = 0
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
- for i, (y, values) in enumerate(y_idx.items()):
- print('\ny =', y)
- k = len(values)
-
- Gn = [Gn_original[g].copy() for g in values]
- # add set median.
- fname_sm = dir_output + 'medians/' + str(int(y)) + '/set_median.k' + str(int(k)) \
- + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
- set_median = loadGXL(fname_sm)
- Gn.append(set_median)
- # add generalized median (estimated pre-image.)
- fname_gm = dir_output + 'medians/' + str(int(y)) + '/gen_median.k' + str(int(k)) \
- + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
- gen_median = loadGXL(fname_gm)
- Gn.append(gen_median)
-
- # compute distance matrix
- median_set = range(0, len(values))
-
- Gn_median_set = [Gn[i].copy() for i in median_set]
- Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label,
- edge_label, False)
- Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
- dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
- Kmatrix=Kmatrix, gkernel=gkernel)
- print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
- print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
- print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-
- # add distances for the image of exact median \psi.
- dis_k_median_list = []
- for idx, g in enumerate(Gn):
- dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)),
- [1 / len(Gn_median_set)] * len(Gn_median_set),
- Kmatrix_median, withterm3=False))
- dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
- for i in range(len(Gn)):
- for j in range(i, len(Gn)):
- dis_mat_median[i, j] = dis_mat[i, j]
- dis_mat_median[j, i] = dis_mat_median[i, j]
- for i in range(len(Gn)):
- dis_mat_median[i, -1] = dis_k_median_list[i]
- dis_mat_median[-1, i] = dis_k_median_list[i]
-
-
- # visualization.
- # visualize_graph_dataset('graph-kernel', 'tsne', Gn)
- # visualize_graph_dataset('graph-kernel', 'tsne', draw_figure,
- # draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
- visualize_graph_dataset('graph-kernel', 'tsne', draw_figure,
- draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
- median_set=median_set)
-
-
- def visualize_distances_in_ged_monoterpenoides():
- from gklearn.preimage.fitDistance import compute_geds
- from gklearn.preimage.ged import GED
- import os
-
- ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
- 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
- Gn_original, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:50]
-
- # compute distance matrix
- # median_set = [22, 29, 54, 74]
- gkernel = 'treeletkernel'
- fit_method = 'expert'
- ds_name = 'monoterpenoides'
- fname_medians = fit_method + '.' + gkernel
- dir_output = 'results/xp_monoterpenoides/'
- repeat = 0
- # edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
- edit_costs = [3, 3, 1, 3, 3, 1]
- # edit_costs = [7, 3, 5, 9, 2, 6]
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
- for i, (y, values) in enumerate(y_idx.items()):
- print('\ny =', y)
- k = len(values)
-
- Gn = [Gn_original[g].copy() for g in values]
- # add set median.
- fname_sm = dir_output + 'medians/' + str(int(y)) + '/set_median.k' + str(int(k)) \
- + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
- set_median = loadGXL(fname_sm)
- Gn.append(set_median)
- # add generalized median (estimated pre-image.)
- fname_gm = dir_output + 'medians/' + str(int(y)) + '/gen_median.k' + str(int(k)) \
- + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
- gen_median = loadGXL(fname_gm)
- Gn.append(gen_median)
-
-
- # compute/load ged matrix.
- # compute.
- algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- params_ged = {'dataset': ds_name, 'lib': 'gedlibpy', 'cost': 'CONSTANT',
- 'method': 'IPFP', 'algo_options': algo_options,
- 'stabilizer': None, 'edit_cost_constant': edit_costs}
- _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
- np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) \
- + '.with_medians.gm', ged_mat=ged_mat)
- # # load from file.
- # gmfile = np.load('dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) + '.with_medians.gm.npz')
- # ged_mat = gmfile['ged_mat']
- # # change medians.
- # algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- # params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
- # 'algo_options': algo_options, 'stabilizer': None,
- # 'edit_cost_constant': edit_costs}
- # for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
- # dis, _, _ = GED(Gn[idx], set_median, **params_ged)
- # ged_mat[idx, -2] = dis
- # ged_mat[-2, idx] = dis
- # dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
- # ged_mat[idx, -1] = dis
- # ged_mat[-1, idx] = dis
- # np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) + '.with_medians.gm',
- # ged_mat=ged_mat)
-
- # visualization.
- median_set = range(0, len(values))
- visualize_graph_dataset('ged', 'tsne', draw_figure,
- draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
- median_set=median_set)
-
-
- ###############################################################################
-
-
- def visualize_distances_in_kernel_letter_h():
-
- ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
- 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
- Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
- # Gn = Gn[0:50]
-
- # compute distance matrix
- # median_set = [22, 29, 54, 74]
- gkernel = 'structuralspkernel'
- fit_method = 'expert'
- node_label = None
- edge_label = None
- ds_name = 'letter-h'
- fname_medians = fit_method + '.' + gkernel
- dir_output = 'results/xp_letter_h/'
- k = 150
- repeat = 0
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
- for i, (y, values) in enumerate(y_idx.items()):
- print('\ny =', y)
-
- Gn = [Gn_original[g].copy() for g in values]
- # add set median.
- fname_sm = dir_output + 'medians/' + y + '/set_median.k' + str(int(k)) \
- + '.y' + y + '.repeat' + str(repeat) + '.gxl'
- set_median = loadGXL(fname_sm)
- Gn.append(set_median)
- # add generalized median (estimated pre-image.)
- fname_gm = dir_output + 'medians/' + y + '/gen_median.k' + str(int(k)) \
- + '.y' + y + '.repeat' + str(repeat) + '.gxl'
- gen_median = loadGXL(fname_gm)
- Gn.append(gen_median)
-
- # compute distance matrix
- median_set = range(0, len(values))
-
- Gn_median_set = [Gn[i].copy() for i in median_set]
- Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label,
- edge_label, False)
- Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
- dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
- Kmatrix=Kmatrix, gkernel=gkernel)
- print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
- print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
- print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-
- # add distances for the image of exact median \psi.
- dis_k_median_list = []
- for idx, g in enumerate(Gn):
- dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)),
- [1 / len(Gn_median_set)] * len(Gn_median_set),
- Kmatrix_median, withterm3=False))
- dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
- for i in range(len(Gn)):
- for j in range(i, len(Gn)):
- dis_mat_median[i, j] = dis_mat[i, j]
- dis_mat_median[j, i] = dis_mat_median[i, j]
- for i in range(len(Gn)):
- dis_mat_median[i, -1] = dis_k_median_list[i]
- dis_mat_median[-1, i] = dis_k_median_list[i]
-
-
- # visualization.
- # visualize_graph_dataset('graph-kernel', 'tsne', Gn)
- # visualize_graph_dataset('graph-kernel', 'tsne', draw_figure,
- # draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
- visualize_graph_dataset('graph-kernel', 'tsne', draw_figure,
- draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
- median_set=median_set)
-
-
- def visualize_distances_in_ged_letter_h():
- from fitDistance import compute_geds
- from preimage.test_k_closest_graphs import reform_attributes
-
- ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
- 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
- Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
- # Gn = Gn[0:50]
-
- # compute distance matrix
- # median_set = [22, 29, 54, 74]
- gkernel = 'structuralspkernel'
- fit_method = 'expert'
- ds_name = 'letter-h'
- fname_medians = fit_method + '.' + gkernel
- dir_output = 'results/xp_letter_h/'
- k = 150
- repeat = 0
- # edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
- edit_costs = [3, 3, 1, 3, 3, 1]
- # edit_costs = [7, 3, 5, 9, 2, 6]
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
- for i, (y, values) in enumerate(y_idx.items()):
- print('\ny =', y)
-
- Gn = [Gn_original[g].copy() for g in values]
- # add set median.
- fname_sm = dir_output + 'medians/' + y + '/set_median.k' + str(int(k)) \
- + '.y' + y + '.repeat' + str(repeat) + '.gxl'
- set_median = loadGXL(fname_sm)
- Gn.append(set_median)
- # add generalized median (estimated pre-image.)
- fname_gm = dir_output + 'medians/' + y + '/gen_median.k' + str(int(k)) \
- + '.y' + y + '.repeat' + str(repeat) + '.gxl'
- gen_median = loadGXL(fname_gm)
- Gn.append(gen_median)
-
-
- # compute/load ged matrix.
- # compute.
- algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- params_ged = {'dataset': 'Letter', 'lib': 'gedlibpy', 'cost': 'CONSTANT',
- 'method': 'IPFP', 'algo_options': algo_options,
- 'stabilizer': None, 'edit_cost_constant': edit_costs}
- for g in Gn:
- reform_attributes(g)
- _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
- np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm', ged_mat=ged_mat)
- # # load from file.
- # gmfile = np.load('dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm.npz')
- # ged_mat = gmfile['ged_mat']
- # # change medians.
- # algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- # params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
- # 'algo_options': algo_options, 'stabilizer': None,
- # 'edit_cost_constant': edit_costs}
- # for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
- # dis, _, _ = GED(Gn[idx], set_median, **params_ged)
- # ged_mat[idx, -2] = dis
- # ged_mat[-2, idx] = dis
- # dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
- # ged_mat[idx, -1] = dis
- # ged_mat[-1, idx] = dis
- # np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm',
- # ged_mat=ged_mat)
-
-
- # visualization.
- median_set = range(0, len(values))
- visualize_graph_dataset('ged', 'tsne', draw_figure,
- draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
- median_set=median_set)
-
-
- if __name__ == '__main__':
- visualize_distances_in_kernel_letter_h()
- # visualize_distances_in_ged_letter_h()
- # visualize_distances_in_kernel_monoterpenoides()
- # visualize_distances_in_kernel_monoterpenoides()
- # visualize_distances_in_kernel()
- # visualize_distances_in_ged()
-
-
-
-
-
-
-
- #def draw_figure_dis_k(ax, Gn_embedded, y_idx=None, legend=False):
- # from matplotlib import colors as mcolors
- # colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
- ## colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
- ## '#c6dbef', '#deebf7']
- # for i, values in enumerate(y_idx.values()):
- # for item in values:
- ## ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')
- # ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')
- # h1 = ax.scatter(Gn_embedded[[12, 13, 22, 29], 0], Gn_embedded[[12, 13, 22, 29], 1], c='r')
- # h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
- # h3 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='gold') # gen median
- # h4 = ax.scatter(Gn_embedded[-3, 0], Gn_embedded[-3, 1], c='r', marker='+') # set median
- # if legend:
- ## fig.subplots_adjust(bottom=0.17)
- # ax.legend([h1, h2, h3, h4], ['k clostest graphs', 'true median', 'gen median', 'set median'])
- ## fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
- ## plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
- ## bbox_inches='tight')
- ## plt.show()
-
-
-
- #def draw_figure_ged(ax, Gn_embedded, y_idx=None, legend=False):
- # from matplotlib import colors as mcolors
- # colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
- ## colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
- ## '#c6dbef', '#deebf7']
- # for i, values in enumerate(y_idx.values()):
- # for item in values:
- ## ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')
- # ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')
- # h1 = ax.scatter(Gn_embedded[[12, 13, 22, 29], 0], Gn_embedded[[12, 13, 22, 29], 1], c='r')
- ## h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
- # h3 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='gold') # gen median
- # h4 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='r', marker='+') # set median
- # if legend:
- ## fig.subplots_adjust(bottom=0.17)
- # ax.legend([h1, h3, h4], ['k clostest graphs', 'gen median', 'set median'])
- ## fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
- ## plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
- ## bbox_inches='tight')
- ## plt.show()
|