import numpy as np from gklearn.ged.model.distances import sum_squares, euclid_d from gklearn.ged.model.ged_com import compute_geds def optimize_costs_unlabeled(nb_cost_mat, dis_k_vec): """ Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat ! take care that nb_cost_mat do not contains 0 lines :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph :param dis_k_vec: The N distances to fit """ import cvxpy as cp import numpy as np MAX_SAMPLE = 1000 nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]] for x in nb_cost_mat]) dis_k_vec = np.array(dis_k_vec) # dis_k_vec_norm = dis_k_vec/np.max(dis_k_vec) # import pickle # pickle.dump([nb_cost_mat, dis_k_vec], open('debug', 'wb')) N = nb_cost_mat_m.shape[0] sub_sample = np.random.permutation(np.arange(N)) sub_sample = sub_sample[:MAX_SAMPLE] x = cp.Variable(nb_cost_mat_m.shape[1]) cost = cp.sum_squares((nb_cost_mat_m[sub_sample, :] @ x) - dis_k_vec[sub_sample]) prob = cp.Problem(cp.Minimize(cost), [x >= 0]) prob.solve() edit_costs_new = [x.value[0], x.value[1], 0, x.value[2], x.value[3], 0] edit_costs_new = [xi if xi > 0 else 0 for xi in edit_costs_new] residual = prob.value return edit_costs_new, residual def optimize_costs_classif_unlabeled(nb_cost_mat, Y): """ Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat ! take care that nb_cost_mat do not contains 0 lines :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph :param dis_k_vec: {-1,1}^N vector of common classes """ # import cvxpy as cp from ml import reg_log # import pickle # pickle.dump([nb_cost_mat, Y], open('debug', 'wb')) nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]] for x in nb_cost_mat]) w, J, _ = reg_log(nb_cost_mat_m, Y, pos_contraint=True) edit_costs_new = [w[0], w[1], 0, w[2], w[3], 0] residual = J[-1] return edit_costs_new, residual def optimize_costs_classif(nb_cost_mat, Y): """ Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat ! take care that nb_cost_mat do not contains 0 lines :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph :param dis_k_vec: {-1,1}^N vector of common classes """ #import pickle # pickle.dump([nb_cost_mat, Y], open("test.pickle", "wb")) from ml import reg_log w, J, _ = reg_log(nb_cost_mat, Y, pos_contraint=True) return w, J[-1] def optimize_costs(nb_cost_mat, dis_k_vec): """ Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat ! take care that nb_cost_mat do not contains 0 lines :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph :param dis_k_vec: The N distances to fit """ import cvxpy as cp x = cp.Variable(nb_cost_mat.shape[1]) cost = cp.sum_squares((nb_cost_mat @ x) - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost), constraints) prob.solve() edit_costs_new = x.value residual = prob.value return edit_costs_new, residual def compute_optimal_costs(G, y, init_costs=[3, 3, 1, 3, 3, 1], y_distance=euclid_d, mode='reg', unlabeled=False, ed_method='BIPARTITE', verbose=True, **kwargs): N = len(y) G_pairs = [] distances_vec = [] for i in range(N): for j in range(i+1, N): G_pairs.append([i, j]) distances_vec.append(y_distance(y[i], y[j])) ged_vec_init, n_edit_operations = compute_geds(G_pairs, G, init_costs, ed_method, verbose=verbose, **kwargs) residual_list = [sum_squares(ged_vec_init, distances_vec)] if (mode == 'reg'): if unlabeled: method_optim = optimize_costs_unlabeled else: method_optim = optimize_costs elif (mode == 'classif'): if unlabeled: method_optim = optimize_costs_classif_unlabeled else: method_optim = optimize_costs_classif ite_max = 5 for i in range(ite_max): if verbose: print('ite', i + 1, '/', ite_max, ':') # compute GEDs and numbers of edit operations. edit_costs_new, residual = method_optim( np.array(n_edit_operations), distances_vec) ged_vec, n_edit_operations = compute_geds(G_pairs, G, edit_costs_new, ed_method, verbose=verbose, **kwargs) residual_list.append(sum_squares(ged_vec, distances_vec)) return edit_costs_new def get_optimal_costs_GH2020(**kwargs): import pickle import os dir_root = 'cj/output/' ds_name = kwargs.get('ds_name') nb_trial = kwargs.get('nb_trial') file_name = os.path.join(dir_root, 'costs.' + ds_name + '.' + str(nb_trial) + '.pkl') with open(file_name, 'rb') as f: edit_costs = pickle.load(f) return edit_costs