diff --git a/lang/fr/gklearn/preimage/utils.py b/lang/fr/gklearn/preimage/utils.py new file mode 100644 index 0000000..d4d5d05 --- /dev/null +++ b/lang/fr/gklearn/preimage/utils.py @@ -0,0 +1,516 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Oct 17 19:05:07 2019 + +Useful functions. +@author: ljia +""" +#import networkx as nx + +import multiprocessing +import numpy as np + +from gklearn.kernels.marginalizedKernel import marginalizedkernel +from gklearn.kernels.untilHPathKernel import untilhpathkernel +from gklearn.kernels.spKernel import spkernel +import functools +from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel +from gklearn.kernels.structuralspKernel import structuralspkernel +from gklearn.kernels.treeletKernel import treeletkernel +from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel +from gklearn.utils import Dataset +import csv +import networkx as nx +import os + + +def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): + import os.path + from gklearn.preimage import MedianPreimageGenerator + from gklearn.utils import split_dataset_by_target + from gklearn.utils.graphfiles import saveGXL + + # 1. get dataset. + print('1. getting dataset...') + dataset_all = Dataset() + dataset_all.load_predefined_dataset(ds_name) + dataset_all.trim_dataset(edge_required=edge_required) + if irrelevant_labels is not None: + dataset_all.remove_labels(**irrelevant_labels) + if cut_range is not None: + dataset_all.cut_graphs(cut_range) + datasets = split_dataset_by_target(dataset_all) + + if save_results: + # create result files. + print('creating output files...') + fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) + + sod_sm_list = [] + sod_gm_list = [] + dis_k_sm_list = [] + dis_k_gm_list = [] + dis_k_gi_min_list = [] + time_optimize_ec_list = [] + time_generate_list = [] + time_total_list = [] + itrs_list = [] + converged_list = [] + num_updates_ecc_list = [] + mge_decrease_order_list = [] + mge_increase_order_list = [] + mge_converged_order_list = [] + nb_sod_sm2gm = [0, 0, 0] + nb_dis_k_sm2gm = [0, 0, 0] + nb_dis_k_gi2sm = [0, 0, 0] + nb_dis_k_gi2gm = [0, 0, 0] + dis_k_max_list = [] + dis_k_min_list = [] + dis_k_mean_list = [] + if load_gm == 'auto': + gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' + gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) + if gmfile_exist: + gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. + gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] + time_precompute_gm_list = gmfile['run_time_list'].tolist() + else: + gram_matrix_unnorm_list = [] + time_precompute_gm_list = [] + elif not load_gm: + gram_matrix_unnorm_list = [] + time_precompute_gm_list = [] + else: + gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' + gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. + gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] + time_precompute_gm_list = gmfile['run_time_list'].tolist() +# repeats_better_sod_sm2gm = [] +# repeats_better_dis_k_sm2gm = [] +# repeats_better_dis_k_gi2sm = [] +# repeats_better_dis_k_gi2gm = [] + + print('starting generating preimage for each class of target...') + idx_offset = 0 + for idx, dataset in enumerate(datasets): + target = dataset.targets[0] + print('\ntarget =', target, '\n') +# if target != 1: +# continue + + num_graphs = len(dataset.graphs) + if num_graphs < 2: + print('\nnumber of graphs = ', num_graphs, ', skip.\n') + idx_offset += 1 + continue + + # 2. set parameters. + print('2. initializing mpg and setting parameters...') + if load_gm: + if gmfile_exist: + mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset] + mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] + mpg = MedianPreimageGenerator() + mpg.dataset = dataset + mpg.set_options(**mpg_options.copy()) + mpg.kernel_options = kernel_options.copy() + mpg.ged_options = ged_options.copy() + mpg.mge_options = mge_options.copy() + + # 3. compute median preimage. + print('3. computing median preimage...') + mpg.run() + results = mpg.get_results() + + # 4. compute pairwise kernel distances. + print('4. computing pairwise kernel distances...') + _, dis_k_max, dis_k_min, dis_k_mean = mpg.graph_kernel.compute_distance_matrix() + dis_k_max_list.append(dis_k_max) + dis_k_min_list.append(dis_k_min) + dis_k_mean_list.append(dis_k_mean) + + # 5. save results (and median graphs). + print('5. saving results (and median graphs)...') + # write result detail. + if save_results: + print('writing results to files...') + sod_sm2gm = get_relations(np.sign(results['sod_gen_median'] - results['sod_set_median'])) + dis_k_sm2gm = get_relations(np.sign(results['k_dis_gen_median'] - results['k_dis_set_median'])) + dis_k_gi2sm = get_relations(np.sign(results['k_dis_set_median'] - results['k_dis_dataset'])) + dis_k_gi2gm = get_relations(np.sign(results['k_dis_gen_median'] - results['k_dis_dataset'])) + + f_detail = open(dir_save + fn_output_detail, 'a') + csv.writer(f_detail).writerow([ds_name, kernel_options['name'], + ged_options['edit_cost'], ged_options['method'], + ged_options['attr_distance'], mpg_options['fit_method'], + num_graphs, target, 1, + results['sod_set_median'], results['sod_gen_median'], + results['k_dis_set_median'], results['k_dis_gen_median'], + results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, + dis_k_gi2sm, dis_k_gi2gm, results['edit_cost_constants'], + results['runtime_precompute_gm'], results['runtime_optimize_ec'], + results['runtime_generate_preimage'], results['runtime_total'], + results['itrs'], results['converged'], + results['num_updates_ecc'], + results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge + results['mge']['num_increase_order'] > 0, + results['mge']['num_converged_descents'] > 0]) + f_detail.close() + + # compute result summary. + sod_sm_list.append(results['sod_set_median']) + sod_gm_list.append(results['sod_gen_median']) + dis_k_sm_list.append(results['k_dis_set_median']) + dis_k_gm_list.append(results['k_dis_gen_median']) + dis_k_gi_min_list.append(results['k_dis_dataset']) + time_precompute_gm_list.append(results['runtime_precompute_gm']) + time_optimize_ec_list.append(results['runtime_optimize_ec']) + time_generate_list.append(results['runtime_generate_preimage']) + time_total_list.append(results['runtime_total']) + itrs_list.append(results['itrs']) + converged_list.append(results['converged']) + num_updates_ecc_list.append(results['num_updates_ecc']) + mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0) + mge_increase_order_list.append(results['mge']['num_increase_order'] > 0) + mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0) + # # SOD SM -> GM + if results['sod_set_median'] > results['sod_gen_median']: + nb_sod_sm2gm[0] += 1 + # repeats_better_sod_sm2gm.append(1) + elif results['sod_set_median'] == results['sod_gen_median']: + nb_sod_sm2gm[1] += 1 + elif results['sod_set_median'] < results['sod_gen_median']: + nb_sod_sm2gm[2] += 1 + # # dis_k SM -> GM + if results['k_dis_set_median'] > results['k_dis_gen_median']: + nb_dis_k_sm2gm[0] += 1 + # repeats_better_dis_k_sm2gm.append(1) + elif results['k_dis_set_median'] == results['k_dis_gen_median']: + nb_dis_k_sm2gm[1] += 1 + elif results['k_dis_set_median'] < results['k_dis_gen_median']: + nb_dis_k_sm2gm[2] += 1 + # # dis_k gi -> SM + if results['k_dis_dataset'] > results['k_dis_set_median']: + nb_dis_k_gi2sm[0] += 1 + # repeats_better_dis_k_gi2sm.append(1) + elif results['k_dis_dataset'] == results['k_dis_set_median']: + nb_dis_k_gi2sm[1] += 1 + elif results['k_dis_dataset'] < results['k_dis_set_median']: + nb_dis_k_gi2sm[2] += 1 + # # dis_k gi -> GM + if results['k_dis_dataset'] > results['k_dis_gen_median']: + nb_dis_k_gi2gm[0] += 1 + # repeats_better_dis_k_gi2gm.append(1) + elif results['k_dis_dataset'] == results['k_dis_gen_median']: + nb_dis_k_gi2gm[1] += 1 + elif results['k_dis_dataset'] < results['k_dis_gen_median']: + nb_dis_k_gi2gm[2] += 1 + + # write result summary for each letter. + f_summary = open(dir_save + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, kernel_options['name'], + ged_options['edit_cost'], ged_options['method'], + ged_options['attr_distance'], mpg_options['fit_method'], + num_graphs, target, + results['sod_set_median'], results['sod_gen_median'], + results['k_dis_set_median'], results['k_dis_gen_median'], + results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, + dis_k_gi2sm, dis_k_gi2gm, + results['runtime_precompute_gm'], results['runtime_optimize_ec'], + results['runtime_generate_preimage'], results['runtime_total'], + results['itrs'], results['converged'], + results['num_updates_ecc'], + results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge + results['mge']['num_increase_order'] > 0, + results['mge']['num_converged_descents'] > 0, + nb_sod_sm2gm, + nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) + f_summary.close() + + # save median graphs. + if save_medians: + if not os.path.exists(dir_save + 'medians/'): + os.makedirs(dir_save + 'medians/') + print('Saving median graphs to files...') + fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) + saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', + node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) + fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) + saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default', + node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) + fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) + saveGXL(mpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', + node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) + + # plot median graphs. + if plot_medians and save_medians: + if ged_options['edit_cost'] == 'LETTER2' or ged_options['edit_cost'] == 'LETTER' or ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low': + draw_Letter_graph(mpg.set_median, fn_pre_sm) + draw_Letter_graph(mpg.gen_median, fn_pre_gm) + draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset) + + if (load_gm == 'auto' and not gmfile_exist) or not load_gm: + gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm) + + # write result summary for each class. + if save_results: + sod_sm_mean = np.mean(sod_sm_list) + sod_gm_mean = np.mean(sod_gm_list) + dis_k_sm_mean = np.mean(dis_k_sm_list) + dis_k_gm_mean = np.mean(dis_k_gm_list) + dis_k_gi_min_mean = np.mean(dis_k_gi_min_list) + time_precompute_gm_mean = np.mean(time_precompute_gm_list) + time_optimize_ec_mean = np.mean(time_optimize_ec_list) + time_generate_mean = np.mean(time_generate_list) + time_total_mean = np.mean(time_total_list) + itrs_mean = np.mean(itrs_list) + num_converged = np.sum(converged_list) + num_updates_ecc_mean = np.mean(num_updates_ecc_list) + num_mge_decrease_order = np.sum(mge_decrease_order_list) + num_mge_increase_order = np.sum(mge_increase_order_list) + num_mge_converged = np.sum(mge_converged_order_list) + sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) + dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) + dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) + dis_k_gi2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean)) + f_summary = open(dir_save + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, kernel_options['name'], + ged_options['edit_cost'], ged_options['method'], + ged_options['attr_distance'], mpg_options['fit_method'], + num_graphs, 'all', + sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, + dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, + dis_k_gi2sm_mean, dis_k_gi2gm_mean, + time_precompute_gm_mean, time_optimize_ec_mean, + time_generate_mean, time_total_mean, itrs_mean, + num_converged, num_updates_ecc_mean, + num_mge_decrease_order, num_mge_increase_order, + num_mge_converged]) + f_summary.close() + + # save total pairwise kernel distances. + dis_k_max = np.max(dis_k_max_list) + dis_k_min = np.min(dis_k_min_list) + dis_k_mean = np.mean(dis_k_mean_list) + print('The maximum pairwise distance in kernel space:', dis_k_max) + print('The minimum pairwise distance in kernel space:', dis_k_min) + print('The average pairwise distance in kernel space:', dis_k_mean) + + # write Gram matrices to file. + if (load_gm == 'auto' and not gmfile_exist) or not load_gm: + np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) + + print('\ncomplete.\n') + + +def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): + if not os.path.exists(dir_output): + os.makedirs(dir_output) +# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' + fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' + f_detail = open(dir_output + fn_output_detail, 'a') + csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', + 'GED method', 'attr distance', 'fit method', 'num graphs', + 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', + 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', + 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', + 'time optimize ec', 'time generate preimage', 'time total', + 'itrs', 'converged', 'num updates ecc', 'mge decrease order', + 'mge increase order', 'mge converged']) + f_detail.close() + +# fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' + fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv' + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', + 'GED method', 'attr distance', 'fit method', 'num graphs', + 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', + 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', + 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', + 'time generate preimage', 'time total', 'itrs', 'num converged', + 'num updates ecc', 'mge num decrease order', 'mge num increase order', + 'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', + '# dis_k gi -> SM', '# dis_k gi -> GM']) +# 'repeats better SOD SM -> GM', +# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', +# 'repeats better dis_k gi -> GM']) + f_summary.close() + + return fn_output_detail, fn_output_summary + + +def get_relations(sign): + if sign == -1: + return 'better' + elif sign == 0: + return 'same' + elif sign == 1: + return 'worse' + + +#Dessin median courrant +def draw_Letter_graph(graph, file_prefix): + import matplotlib + matplotlib.use('agg') + import matplotlib.pyplot as plt + plt.figure() + pos = {} + for n in graph.nodes: + pos[n] = np.array([float(graph.nodes[n]['x']),float(graph.nodes[n]['y'])]) + nx.draw_networkx(graph, pos) + plt.savefig(file_prefix + '.eps', format='eps', dpi=300) +# plt.show() + plt.clf() + plt.close() + + +def remove_edges(Gn): + for G in Gn: + for _, _, attrs in G.edges(data=True): + attrs.clear() + + +def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): + term1 = Kmatrix[idx_g, idx_g] + term2 = 0 + for i, a in enumerate(alpha): + term2 += a * Kmatrix[idx_g, idx_gi[i]] + term2 *= 2 + if withterm3 == False: + for i1, a1 in enumerate(alpha): + for i2, a2 in enumerate(alpha): + term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] + return np.sqrt(term1 - term2 + term3) + + +def compute_k_dis(idx_g, idx_gi, alphas, Kmatrix, term3=0, withterm3=True): + term1 = Kmatrix[idx_g, idx_g] + term2 = 0 + for i, a in enumerate(alphas): + term2 += a * Kmatrix[idx_g, idx_gi[i]] + term2 *= 2 + if withterm3 == False: + for i1, a1 in enumerate(alphas): + for i2, a2 in enumerate(alphas): + term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] + return np.sqrt(term1 - term2 + term3) + + +def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'): + if graph_kernel == 'marginalizedkernel': + Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label, + p_quit=0.03, n_iteration=10, remove_totters=False, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + elif graph_kernel == 'untilhpathkernel': + Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label, + depth=7, k_func='MinMax', compute_method='trie', + parallel=parallel, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + elif graph_kernel == 'spkernel': + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + Kmatrix = np.empty((len(Gn), len(Gn))) +# Kmatrix[:] = np.nan + Kmatrix, _, idx = spkernel(Gn, node_label=node_label, node_kernels= + {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) +# for i, row in enumerate(idx): +# for j, col in enumerate(idx): +# Kmatrix[row, col] = Kmatrix_tmp[i, j] + elif graph_kernel == 'structuralspkernel': + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, + edge_label=edge_label, node_kernels=sub_kernels, + edge_kernels=sub_kernels, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), + verbose=verbose) + elif graph_kernel == 'treeletkernel': + pkernel = functools.partial(polynomialkernel, d=2, c=1e5) +# pkernel = functools.partial(gaussiankernel, gamma=1e-6) + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, + sub_kernel=pkernel, parallel=parallel, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + elif graph_kernel == 'weisfeilerlehmankernel': + Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, + height=4, base_kernel='subtree', parallel=None, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + else: + raise Exception('The graph kernel "', graph_kernel, '" is not defined.') + + # normalization + Kmatrix_diag = Kmatrix.diagonal().copy() + for i in range(len(Kmatrix)): + for j in range(i, len(Kmatrix)): + Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) + Kmatrix[j][i] = Kmatrix[i][j] + return Kmatrix + + +def gram2distances(Kmatrix): + dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) + for i1 in range(len(Kmatrix)): + for i2 in range(len(Kmatrix)): + dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] + dmatrix = np.sqrt(dmatrix) + return dmatrix + + +def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, + gkernel=None, verbose=True): + import warnings + warnings.warn('gklearn.preimage.utils.kernel_distance_matrix is deprecated, use gklearn.kernels.graph_kernel.compute_distance_matrix or gklearn.utils.compute_distance_matrix instead', DeprecationWarning) + dis_mat = np.empty((len(Gn), len(Gn))) + if Kmatrix is None: + Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, verbose) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] + if dis < 0: + if dis > -1e-10: + dis = 0 + else: + raise ValueError('The distance is negative.') + dis_mat[i, j] = np.sqrt(dis) + dis_mat[j, i] = dis_mat[i, j] + dis_max = np.max(np.max(dis_mat)) + dis_min = np.min(np.min(dis_mat[dis_mat != 0])) + dis_mean = np.mean(np.mean(dis_mat)) + return dis_mat, dis_max, dis_min, dis_mean + + +def get_same_item_indices(ls): + """Get the indices of the same items in a list. Return a dict keyed by items. + """ + idx_dict = {} + for idx, item in enumerate(ls): + if item in idx_dict: + idx_dict[item].append(idx) + else: + idx_dict[item] = [idx] + return idx_dict + + +def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None, + node_label=None, edge_label=None): + dis_k_all = [] # distance between g_star and each graph. + alpha = [1 / len(Gn)] * len(Gn) + if Kmatrix is None: + Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True) + term3 = 0 + for i1, a1 in enumerate(alpha): + for i2, a2 in enumerate(alpha): + term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] + for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): + dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) + dis_all.append(dtemp) + + +def normalize_distance_matrix(D): + max_value = np.amax(D) + min_value = np.amin(D) + return (D - min_value) / (max_value - min_value) \ No newline at end of file