#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Sep 5 15:59:00 2019 @author: ljia """ import numpy as np import networkx as nx import matplotlib.pyplot as plt import time import random #from tqdm import tqdm #import os import sys sys.path.insert(0, "../") from pygraph.utils.graphfiles import loadDataset #from pygraph.utils.logger2file import * from iam import iam_upgraded from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar #from ged import ged_median def test_iam_monoterpenoides_with_init40(): gkernel = 'untilhpathkernel' node_label = 'atom' edge_label = 'bond_type' # unfitted edit costs. c_vi = 3 c_vr = 3 c_vs = 1 c_ei = 3 c_er = 3 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.0001 removeNodes = False connected_iam = False # parameters for IAM function # ged_cost = 'CONSTANT' ged_cost = 'CONSTANT' ged_method = 'IPFP' edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] ged_stabilizer = None # ged_repeat = 50 algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'algo_options': algo_options, 'stabilizer': ged_stabilizer} collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' graph_dir = collection_path + 'gxl/' y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] repeats = 50 # classify graphs according to classes. time_list = [] dis_ks_min_list = [] dis_ks_set_median_list = [] sod_gs_list = [] g_best = [] sod_set_median_list = [] sod_list_list = [] for y in y_all: print('\n-------------------------------------------------------') print('class of y:', y) time_list.append([]) dis_ks_min_list.append([]) dis_ks_set_median_list.append([]) sod_gs_list.append([]) g_best.append([]) sod_set_median_list.append([]) for repeat in range(repeats): # load median set. collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml' Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir) Gn_candidate = [g.copy() for g in Gn_median] time0 = time.time() G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list[-1].append(time_total) g_best[-1].append(G_gen_median_list[0]) sod_set_median_list[-1].append(sod_set_median) print('\nsmallest sod of the set median:', sod_set_median) sod_gs_list[-1].append(sod_gen_median) print('\nsmallest sod in graph space:', sod_gen_median) sod_list_list.append(sod_list) # # show the best graph and save it to file. # print('one of the possible corresponding pre-images is') # nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), # with_labels=True) ## plt.show() # # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + ## plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + ## '_repeat' + str(repeat) + '_' + str(time.time()) + ## '.png', format="PNG") # plt.clf() # # print(G_gen_median_list[0].nodes(data=True)) # # print(G_gen_median_list[0].edges(data=True)) print('\nsods of the set median for this class:', sod_set_median_list[-1]) print('\nsods in graph space for this class:', sod_gs_list[-1]) # print('\ndistance in kernel space of set median for this class:', # dis_ks_set_median_list[-1]) # print('\nsmallest distances in kernel space for this class:', # dis_ks_min_list[-1]) print('\ntimes for this class:', time_list[-1]) sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) sod_gs_list[-1] = np.mean(sod_gs_list[-1]) # dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1]) # dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) time_list[-1] = np.mean(time_list[-1]) print() print('\nmean sods of the set median for each class:', sod_set_median_list) print('\nmean sods in graph space for each class:', sod_gs_list) # print('\ndistances in kernel space of set median for each class:', # dis_ks_set_median_list) # print('\nmean smallest distances in kernel space for each class:', # dis_ks_min_list) print('\nmean times for each class:', time_list) print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) # print('\nmean distances in kernel space of set median of all:', # np.mean(dis_ks_set_median_list)) # print('\nmean smallest distances in kernel space of all:', # np.mean(dis_ks_min_list)) print('\nmean times of all:', np.mean(time_list)) def test_iam_monoterpenoides(): ds = {'name': 'monoterpenoides', 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb Gn, y_all = loadDataset(ds['dataset']) # Gn = Gn[0:50] gkernel = 'untilhpathkernel' node_label = 'atom' edge_label = 'bond_type' # parameters for GED function from the IAM paper. # fitted edit costs (Gaussian). c_vi = 0.03620133402089074 c_vr = 0.0417574590207099 c_vs = 0.009992282328587499 c_ei = 0.08293120042342755 c_er = 0.09512220476358019 c_es = 0.09222529696841467 # # fitted edit costs (linear combinations). # c_vi = 0.1749684054238749 # c_vr = 0.0734054228711457 # c_vs = 0.05017781726016715 # c_ei = 0.1869431164806936 # c_er = 0.32055856948274 # c_es = 0.2569469379247611 # # unfitted edit costs. # c_vi = 3 # c_vr = 3 # c_vs = 1 # c_ei = 3 # c_er = 3 # c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function # ged_cost = 'CONSTANT' ged_cost = 'CONSTANT' ged_method = 'IPFP' edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # classify graphs according to letters. time_list = [] dis_ks_min_list = [] dis_ks_set_median_list = [] sod_gs_list = [] g_best = [] sod_set_median_list = [] sod_list_list = [] idx_dict = get_same_item_indices(y_all) for y_class in idx_dict: print('\n-------------------------------------------------------') print('class of y:', y_class) Gn_class = [Gn[i].copy() for i in idx_dict[y_class]] time_list.append([]) dis_ks_min_list.append([]) dis_ks_set_median_list.append([]) sod_gs_list.append([]) g_best.append([]) sod_set_median_list.append([]) for repeat in range(50): idx_rdm = random.sample(range(len(Gn_class)), 10) print('graphs chosen:', idx_rdm) Gn_median = [Gn_class[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] alpha_range = [1 / len(Gn_median)] * len(Gn_median) time0 = time.time() G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list[-1].append(time_total) g_best[-1].append(G_gen_median_list[0]) sod_set_median_list[-1].append(sod_set_median) print('\nsmallest sod of the set median:', sod_set_median) sod_gs_list[-1].append(sod_gen_median) print('\nsmallest sod in graph space:', sod_gen_median) sod_list_list.append(sod_list) # show the best graph and save it to file. print('one of the possible corresponding pre-images is') nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), with_labels=True) # plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + # plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + # '_repeat' + str(repeat) + '_' + str(time.time()) + # '.png', format="PNG") plt.clf() # print(G_gen_median_list[0].nodes(data=True)) # print(G_gen_median_list[0].edges(data=True)) # compute distance between \psi and the set median graph. knew_set_median = compute_kernel(G_set_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_set_median_list = [] for idx, g_tmp in enumerate(G_set_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), len(G_set_median_list) + len(Gn_median) + 1), alpha_range, knew_set_median, withterm3=False)) print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0]) # compute distance between \psi and the new generated graphs. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_list = [] for idx, g_tmp in enumerate(G_gen_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), len(G_gen_median_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list[-1].append(dhat_new_list[0]) print('\nsods of the set median for this class:', sod_set_median_list[-1]) print('\nsods in graph space for this class:', sod_gs_list[-1]) print('\ndistance in kernel space of set median for this class:', dis_ks_set_median_list[-1]) print('\nsmallest distances in kernel space for this class:', dis_ks_min_list[-1]) print('\ntimes for this class:', time_list[-1]) sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) sod_gs_list[-1] = np.mean(sod_gs_list[-1]) dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1]) dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) time_list[-1] = np.mean(time_list[-1]) print() print('\nmean sods of the set median for each class:', sod_set_median_list) print('\nmean sods in graph space for each class:', sod_gs_list) print('\ndistances in kernel space of set median for each class:', dis_ks_set_median_list) print('\nmean smallest distances in kernel space for each class:', dis_ks_min_list) print('\nmean times for each class:', time_list) print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) print('\nmean distances in kernel space of set median of all:', np.mean(dis_ks_set_median_list)) print('\nmean smallest distances in kernel space of all:', np.mean(dis_ks_min_list)) print('\nmean times of all:', np.mean(time_list)) nb_better_sods = 0 nb_worse_sods = 0 nb_same_sods = 0 for sods in sod_list_list: if sods[0] > sods[-1]: nb_better_sods += 1 elif sods[0] < sods[-1]: nb_worse_sods += 1 else: nb_same_sods += 1 print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 'are getting better,', str(nb_worse_sods), 'are getting worse,', str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)), 'sods are improved.') def test_iam_mutag(): ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] gkernel = 'untilhpathkernel' node_label = 'atom' edge_label = 'bond_type' # parameters for GED function from the IAM paper. # fitted edit costs. c_vi = 0.03523843108436513 c_vr = 0.03347339739350128 c_vs = 0.06871290673612238 c_ei = 0.08591999846720685 c_er = 0.07962086440894103 c_es = 0.08596855855478233 # unfitted edit costs. # c_vi = 3 # c_vr = 3 # c_vs = 1 # c_ei = 3 # c_er = 3 # c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function # ged_cost = 'CONSTANT' ged_cost = 'CONSTANT' ged_method = 'IPFP' edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # classify graphs according to letters. time_list = [] dis_ks_min_list = [] dis_ks_set_median_list = [] sod_gs_list = [] g_best = [] sod_set_median_list = [] sod_list_list = [] idx_dict = get_same_item_indices(y_all) for y_class in idx_dict: print('\n-------------------------------------------------------') print('class of y:', y_class) Gn_class = [Gn[i].copy() for i in idx_dict[y_class]] time_list.append([]) dis_ks_min_list.append([]) dis_ks_set_median_list.append([]) sod_gs_list.append([]) g_best.append([]) sod_set_median_list.append([]) for repeat in range(50): idx_rdm = random.sample(range(len(Gn_class)), 10) print('graphs chosen:', idx_rdm) Gn_median = [Gn_class[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] alpha_range = [1 / len(Gn_median)] * len(Gn_median) time0 = time.time() G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list[-1].append(time_total) g_best[-1].append(G_gen_median_list[0]) sod_set_median_list[-1].append(sod_set_median) print('\nsmallest sod of the set median:', sod_set_median) sod_gs_list[-1].append(sod_gen_median) print('\nsmallest sod in graph space:', sod_gen_median) sod_list_list.append(sod_list) # show the best graph and save it to file. print('one of the possible corresponding pre-images is') nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), with_labels=True) # plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + # plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + # '_repeat' + str(repeat) + '_' + str(time.time()) + # '.png', format="PNG") plt.clf() # print(G_gen_median_list[0].nodes(data=True)) # print(G_gen_median_list[0].edges(data=True)) # compute distance between \psi and the set median graph. knew_set_median = compute_kernel(G_set_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_set_median_list = [] for idx, g_tmp in enumerate(G_set_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), len(G_set_median_list) + len(Gn_median) + 1), alpha_range, knew_set_median, withterm3=False)) print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0]) # compute distance between \psi and the new generated graphs. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_list = [] for idx, g_tmp in enumerate(G_gen_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), len(G_gen_median_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list[-1].append(dhat_new_list[0]) print('\nsods of the set median for this class:', sod_set_median_list[-1]) print('\nsods in graph space for this class:', sod_gs_list[-1]) print('\ndistance in kernel space of set median for this class:', dis_ks_set_median_list[-1]) print('\nsmallest distances in kernel space for this class:', dis_ks_min_list[-1]) print('\ntimes for this class:', time_list[-1]) sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) sod_gs_list[-1] = np.mean(sod_gs_list[-1]) dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1]) dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) time_list[-1] = np.mean(time_list[-1]) print() print('\nmean sods of the set median for each class:', sod_set_median_list) print('\nmean sods in graph space for each class:', sod_gs_list) print('\ndistances in kernel space of set median for each class:', dis_ks_set_median_list) print('\nmean smallest distances in kernel space for each class:', dis_ks_min_list) print('\nmean times for each class:', time_list) print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) print('\nmean distances in kernel space of set median of all:', np.mean(dis_ks_set_median_list)) print('\nmean smallest distances in kernel space of all:', np.mean(dis_ks_min_list)) print('\nmean times of all:', np.mean(time_list)) nb_better_sods = 0 nb_worse_sods = 0 nb_same_sods = 0 for sods in sod_list_list: if sods[0] > sods[-1]: nb_better_sods += 1 elif sods[0] < sods[-1]: nb_worse_sods += 1 else: nb_same_sods += 1 print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 'are getting better,', str(nb_worse_sods), 'are getting worse,', str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)), 'sods are improved.') ############################################################################### # tests on different numbers of median-sets. def test_iam_median_nb(): ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] remove_edges(Gn) gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility # # parameters for GED function # c_vi = 0.037 # c_vr = 0.038 # c_vs = 0.075 # c_ei = 0.001 # c_er = 0.001 # c_es = 0.0 # ite_max_iam = 50 # epsilon_iam = 0.001 # removeNodes = False # connected_iam = False # # parameters for IAM function # ged_cost = 'CONSTANT' # ged_method = 'IPFP' # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # ged_stabilizer = 'min' # ged_repeat = 50 # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, # 'edit_cost_constant': edit_cost_constant, # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # parameters for GED function c_vi = 4 c_vr = 4 c_vs = 2 c_ei = 1 c_er = 1 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function ged_cost = 'CHEM_1' ged_method = 'IPFP' edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # find out all the graphs classified to positive group 1. idx_dict = get_same_item_indices(y_all) Gn = [Gn[i] for i in idx_dict[1]] # number of graphs; we what to compute the median of these graphs. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] nb_median_range = [len(Gn)] # # compute Gram matrix. # time0 = time.time() # km = compute_kernel(Gn, gkernel, True) # time_km = time.time() - time0 # # write Gram matrix to file. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) time_list = [] dis_ks_min_list = [] sod_gs_list = [] # sod_gs_min_list = [] # nb_updated_list = [] # nb_updated_k_list = [] g_best = [] for nb_median in nb_median_range: print('\n-------------------------------------------------------') print('number of median graphs =', nb_median) random.seed(1) idx_rdm = random.sample(range(len(Gn)), nb_median) print('graphs chosen:', idx_rdm) Gn_median = [Gn[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn] # for g in Gn_median: # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") # plt.show() # plt.clf() ################################################################### # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') # km_tmp = gmfile['gm'] # time_km = gmfile['gmtime'] # # modify mixed gram matrix. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) # for i in range(len(Gn)): # for j in range(i, len(Gn)): # km[i, j] = km_tmp[i, j] # km[j, i] = km[i, j] # for i in range(len(Gn)): # for j, idx in enumerate(idx_rdm): # km[i, len(Gn) + j] = km[i, idx] # km[len(Gn) + j, i] = km[i, idx] # for i, idx1 in enumerate(idx_rdm): # for j, idx2 in enumerate(idx_rdm): # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] ################################################################### alpha_range = [1 / nb_median] * nb_median time0 = time.time() ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list.append(time_total) # compute distance between \psi and the new generated graphs. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) dhat_new_list = [] for idx, g_tmp in enumerate(ghat_new_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), len(ghat_new_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list.append(dhat_new_list[0]) g_best.append(ghat_new_list[0]) # show the best graph and save it to file. # print('the shortest distance is', dhat) print('one of the possible corresponding pre-images is') nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), with_labels=True) plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + '.png', format="PNG") plt.clf() # print(ghat_list[0].nodes(data=True)) # print(ghat_list[0].edges(data=True)) sod_gs_list.append(sod_min) # sod_gs_min_list.append(np.min(sod_min)) print('\nsmallest sod in graph space: ', sod_min) print('\nsods in graph space: ', sod_gs_list) # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) print('\nsmallest distance in kernel space for each set of median graphs: ', dis_ks_min_list) # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', # nb_updated_list) # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', # nb_updated_k_list) print('\ntimes:', time_list) def test_iam_letter_h(): from median import draw_Letter_graph ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {}} # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', # 'extra_params': {}} # node nsymb # Gn = Gn[0:50] Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) gkernel = 'structuralspkernel' # parameters for GED function from the IAM paper. c_vi = 3 c_vr = 3 c_vs = 1 c_ei = 3 c_er = 3 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function # ged_cost = 'CONSTANT' ged_cost = 'LETTER' ged_method = 'IPFP' # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # classify graphs according to letters. time_list = [] dis_ks_min_list = [] sod_gs_list = [] g_best = [] sod_set_median_list = [] idx_dict = get_same_item_indices(y_all) for letter in idx_dict: print('\n-------------------------------------------------------') print('letter', letter) Gn_let = [Gn[i].copy() for i in idx_dict[letter]] time_list.append([]) dis_ks_min_list.append([]) sod_gs_list.append([]) g_best.append([]) sod_set_median_list.append([]) for repeat in range(50): idx_rdm = random.sample(range(len(Gn_let)), 50) print('graphs chosen:', idx_rdm) Gn_median = [Gn_let[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] alpha_range = [1 / len(Gn_median)] * len(Gn_median) time0 = time.time() ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list[-1].append(time_total) g_best[-1].append(ghat_new_list[0]) sod_set_median_list[-1].append(sod_set_median) print('\nsmallest sod of the set median:', sod_set_median) sod_gs_list[-1].append(sod_min) print('\nsmallest sod in graph space:', sod_min) # show the best graph and save it to file. print('one of the possible corresponding pre-images is') draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/') # compute distance between \psi and the new generated graphs. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) dhat_new_list = [] for idx, g_tmp in enumerate(ghat_new_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), len(ghat_new_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list[-1].append(dhat_new_list[0]) print('\nsods of the set median for this letter:', sod_set_median_list[-1]) print('\nsods in graph space for this letter:', sod_gs_list[-1]) print('\nsmallest distances in kernel space for this letter:', dis_ks_min_list[-1]) print('\ntimes for this letter:', time_list[-1]) sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) sod_gs_list[-1] = np.mean(sod_gs_list[-1]) dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) time_list[-1] = np.mean(time_list[-1]) print('\nmean sods of the set median for each letter:', sod_set_median_list) print('\nmean sods in graph space for each letter:', sod_gs_list) print('\nmean smallest distances in kernel space for each letter:', dis_ks_min_list) print('\nmean times for each letter:', time_list) print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) print('\nmean smallest distances in kernel space of all:', np.mean(dis_ks_min_list)) print('\nmean times of all:', np.mean(time_list)) def test_iam_fitdistance(): ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] # remove_edges(Gn) gkernel = 'marginalizedkernel' node_label = 'atom' edge_label = 'bond_type' # lmbda = 0.03 # termination probalility # # parameters for GED function # c_vi = 0.037 # c_vr = 0.038 # c_vs = 0.075 # c_ei = 0.001 # c_er = 0.001 # c_es = 0.0 # ite_max_iam = 50 # epsilon_iam = 0.001 # removeNodes = False # connected_iam = False # # parameters for IAM function # ged_cost = 'CONSTANT' # ged_method = 'IPFP' # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # ged_stabilizer = 'min' # ged_repeat = 50 # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, # 'edit_cost_constant': edit_cost_constant, # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # parameters for GED function c_vi = 4 c_vr = 4 c_vs = 2 c_ei = 1 c_er = 1 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function ged_cost = 'CHEM_1' ged_method = 'IPFP' edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # find out all the graphs classified to positive group 1. idx_dict = get_same_item_indices(y_all) Gn = [Gn[i] for i in idx_dict[1]] # number of graphs; we what to compute the median of these graphs. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] nb_median_range = [10] # # compute Gram matrix. # time0 = time.time() # km = compute_kernel(Gn, gkernel, True) # time_km = time.time() - time0 # # write Gram matrix to file. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) time_list = [] dis_ks_min_list = [] dis_ks_gen_median_list = [] sod_gs_list = [] # sod_gs_min_list = [] # nb_updated_list = [] # nb_updated_k_list = [] g_best = [] for nb_median in nb_median_range: print('\n-------------------------------------------------------') print('number of median graphs =', nb_median) random.seed(1) idx_rdm = random.sample(range(len(Gn)), nb_median) print('graphs chosen:', idx_rdm) Gn_median = [Gn[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] # for g in Gn_median: # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") # plt.show() # plt.clf() ################################################################### # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') # km_tmp = gmfile['gm'] # time_km = gmfile['gmtime'] # # modify mixed gram matrix. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) # for i in range(len(Gn)): # for j in range(i, len(Gn)): # km[i, j] = km_tmp[i, j] # km[j, i] = km[i, j] # for i in range(len(Gn)): # for j, idx in enumerate(idx_rdm): # km[i, len(Gn) + j] = km[i, idx] # km[len(Gn) + j, i] = km[i, idx] # for i, idx1 in enumerate(idx_rdm): # for j, idx2 in enumerate(idx_rdm): # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] ################################################################### alpha_range = [1 / nb_median] * nb_median time0 = time.time() G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list.append(time_total) # compute distance between \psi and the new generated graphs. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_list = [] for idx, g_tmp in enumerate(G_gen_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), len(G_gen_median_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list.append(dhat_new_list[0]) g_best.append(G_gen_median_list[0]) # show the best graph and save it to file. # print('the shortest distance is', dhat) print('one of the possible corresponding pre-images is') nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), with_labels=True) plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + # plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + # '.png', format="PNG") plt.clf() # print(ghat_list[0].nodes(data=True)) # print(ghat_list[0].edges(data=True)) sod_gs_list.append(sod_gen_median) # sod_gs_min_list.append(np.min(sod_gen_median)) print('\nsmallest sod in graph space: ', sod_gen_median) print('\nsmallest sod of set median in graph space: ', sod_set_median) print('\nsods in graph space: ', sod_gs_list) # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) print('\nsmallest distance in kernel space for each set of median graphs: ', dis_ks_min_list) # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', # nb_updated_list) # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', # nb_updated_k_list) print('\ntimes:', time_list) ############################################################################### if __name__ == '__main__': ############################################################################### # tests on different numbers of median-sets. # test_iam_median_nb() # test_iam_letter_h() # test_iam_monoterpenoides() # test_iam_mutag() # test_iam_fitdistance() # print("test log") test_iam_monoterpenoides_with_init40()