diff --git a/gklearn/preimage/common_types.py b/gklearn/preimage/common_types.py
deleted file mode 100644
index 2face25..0000000
--- a/gklearn/preimage/common_types.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 19 18:17:38 2020
-
-@author: ljia
-"""
-
-from enum import Enum, auto
-
-class AlgorithmState(Enum):
- """can be used to specify the state of an algorithm.
- """
- CALLED = auto # The algorithm has been called.
- INITIALIZED = auto # The algorithm has been initialized.
- CONVERGED = auto # The algorithm has converged.
- TERMINATED = auto # The algorithm has terminated.
\ No newline at end of file
diff --git a/gklearn/preimage/cpp2python.py b/gklearn/preimage/cpp2python.py
deleted file mode 100644
index 9d63026..0000000
--- a/gklearn/preimage/cpp2python.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Mar 20 11:09:04 2020
-
-@author: ljia
-"""
-import re
-
-def convert_function(cpp_code):
-# f_cpp = open('cpp_code.cpp', 'r')
-# # f_cpp = open('cpp_ext/src/median_graph_estimator.ipp', 'r')
-# cpp_code = f_cpp.read()
- python_code = cpp_code.replace('else if (', 'elif ')
- python_code = python_code.replace('if (', 'if ')
- python_code = python_code.replace('else {', 'else:')
- python_code = python_code.replace(') {', ':')
- python_code = python_code.replace(';\n', '\n')
- python_code = re.sub('\n(.*)}\n', '\n\n', python_code)
- # python_code = python_code.replace('}\n', '')
- python_code = python_code.replace('throw', 'raise')
- python_code = python_code.replace('error', 'Exception')
- python_code = python_code.replace('"', '\'')
- python_code = python_code.replace('\\\'', '"')
- python_code = python_code.replace('try {', 'try:')
- python_code = python_code.replace('true', 'True')
- python_code = python_code.replace('false', 'False')
- python_code = python_code.replace('catch (...', 'except')
- # python_code = re.sub('std::string\(\'(.*)\'\)', '$1', python_code)
-
- return python_code
-
-
-
-# # python_code = python_code.replace('}\n', '')
-
-
-
-
-# python_code = python_code.replace('option.first', 'opt_name')
-# python_code = python_code.replace('option.second', 'opt_val')
-# python_code = python_code.replace('ged::Error', 'Exception')
-# python_code = python_code.replace('std::string(\'Invalid argument "\')', '\'Invalid argument "\'')
-
-
-# f_cpp.close()
-# f_python = open('python_code.py', 'w')
-# f_python.write(python_code)
-# f_python.close()
-
-
-def convert_function_comment(cpp_fun_cmt, param_types):
- cpp_fun_cmt = cpp_fun_cmt.replace('\t', '')
- cpp_fun_cmt = cpp_fun_cmt.replace('\n * ', ' ')
- # split the input comment according to key words.
- param_split = None
- note = None
- cmt_split = cpp_fun_cmt.split('@brief')[1]
- brief = cmt_split
- if '@param' in cmt_split:
- cmt_split = cmt_split.split('@param')
- brief = cmt_split[0]
- param_split = cmt_split[1:]
- if '@note' in cmt_split[-1]:
- note_split = cmt_split[-1].split('@note')
- if param_split is not None:
- param_split.pop()
- param_split.append(note_split[0])
- else:
- brief = note_split[0]
- note = note_split[1]
-
- # get parameters.
- if param_split is not None:
- for idx, param in enumerate(param_split):
- _, param_name, param_desc = param.split(' ', 2)
- param_name = function_comment_strip(param_name, ' *\n\t/')
- param_desc = function_comment_strip(param_desc, ' *\n\t/')
- param_split[idx] = (param_name, param_desc)
-
- # strip comments.
- brief = function_comment_strip(brief, ' *\n\t/')
- if note is not None:
- note = function_comment_strip(note, ' *\n\t/')
-
- # construct the Python function comment.
- python_fun_cmt = '"""'
- python_fun_cmt += brief + '\n'
- if param_split is not None and len(param_split) > 0:
- python_fun_cmt += '\nParameters\n----------'
- for idx, param in enumerate(param_split):
- python_fun_cmt += '\n' + param[0] + ' : ' + param_types[idx]
- python_fun_cmt += '\n\t' + param[1] + '\n'
- if note is not None:
- python_fun_cmt += '\nNote\n----\n' + note + '\n'
- python_fun_cmt += '"""'
-
- return python_fun_cmt
-
-
-def function_comment_strip(comment, bad_chars):
- head_removed, tail_removed = False, False
- while not head_removed or not tail_removed:
- if comment[0] in bad_chars:
- comment = comment[1:]
- head_removed = False
- else:
- head_removed = True
- if comment[-1] in bad_chars:
- comment = comment[:-1]
- tail_removed = False
- else:
- tail_removed = True
-
- return comment
-
-
-if __name__ == '__main__':
-# python_code = convert_function("""
-# if (print_to_stdout_ == 2) {
-# std::cout << "\n===========================================================\n";
-# std::cout << "Block gradient descent for initial median " << median_pos + 1 << " of " << medians.size() << ".\n";
-# std::cout << "-----------------------------------------------------------\n";
-# }
-# """)
-
-
- python_fun_cmt = convert_function_comment("""
- /*!
- * @brief Returns the sum of distances.
- * @param[in] state The state of the estimator.
- * @return The sum of distances of the median when the estimator was in the state @p state during the last call to run().
- */
- """, ['string', 'string'])
\ No newline at end of file
diff --git a/gklearn/preimage/find_best_k.py b/gklearn/preimage/find_best_k.py
deleted file mode 100644
index df38d32..0000000
--- a/gklearn/preimage/find_best_k.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jan 9 11:54:32 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs
-
-def find_best_k():
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
-# Gn = Gn[0:50]
- gkernel = 'treeletkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- ds_name = 'mono'
- dir_output = 'results/test_find_best_k/'
-
- repeats = 50
- k_list = range(2, 11)
- fit_method = 'k-graphs'
- # fitted on the whole dataset - treelet - mono
- edit_costs = [0.1268873773592978, 0.004084633224249829, 0.0897581955378986, 0.15328856114451297, 0.3109956881625734, 0.0]
-
- # create result files.
- fn_output_detail = 'results_detail.' + fit_method + '.csv'
- f_detail = open(dir_output + fn_output_detail, 'a')
- csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k',
- 'repeat', 'median set', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
- 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
- 'dis_k gi -> GM'])
- f_detail.close()
- fn_output_summary = 'results_summary.csv'
- f_summary = open(dir_output + fn_output_summary, 'a')
- csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k',
- 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
- 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
- 'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM',
- '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM',
- 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',
- 'repeats better dis_k gi -> GM'])
- f_summary.close()
-
- random.seed(1)
- rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-
- for k in k_list:
- print('\n--------- k =', k, '----------')
-
- sod_sm_list = []
- sod_gm_list = []
- dis_k_sm_list = []
- dis_k_gm_list = []
- dis_k_gi_min_list = []
- nb_sod_sm2gm = [0, 0, 0]
- nb_dis_k_sm2gm = [0, 0, 0]
- nb_dis_k_gi2sm = [0, 0, 0]
- nb_dis_k_gi2gm = [0, 0, 0]
- repeats_better_sod_sm2gm = []
- repeats_better_dis_k_sm2gm = []
- repeats_better_dis_k_gi2sm = []
- repeats_better_dis_k_gi2gm = []
-
-
- for repeat in range(repeats):
- print('\nrepeat =', repeat)
- random.seed(rdn_seed_list[repeat])
- median_set_idx = random.sample(range(0, len(Gn)), k)
- print('median set: ', median_set_idx)
-
- sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
- = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k,
- fit_method='k-graphs',
- edit_costs=edit_costs,
- group_min=median_set_idx,
- parallel=False)
-
- # write result detail.
- sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
- dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
- dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
- dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
- f_detail = open(dir_output + fn_output_detail, 'a')
- csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, repeat,
- median_set_idx, sod_sm, sod_gm, dis_k_sm, dis_k_gm,
- dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
- dis_k_gi2gm])
- f_detail.close()
-
- # compute result summary.
- sod_sm_list.append(sod_sm)
- sod_gm_list.append(sod_gm)
- dis_k_sm_list.append(dis_k_sm)
- dis_k_gm_list.append(dis_k_gm)
- dis_k_gi_min_list.append(dis_k_gi_min)
- # # SOD SM -> GM
- if sod_sm > sod_gm:
- nb_sod_sm2gm[0] += 1
- repeats_better_sod_sm2gm.append(repeat)
- elif sod_sm == sod_gm:
- nb_sod_sm2gm[1] += 1
- elif sod_sm < sod_gm:
- nb_sod_sm2gm[2] += 1
- # # dis_k SM -> GM
- if dis_k_sm > dis_k_gm:
- nb_dis_k_sm2gm[0] += 1
- repeats_better_dis_k_sm2gm.append(repeat)
- elif dis_k_sm == dis_k_gm:
- nb_dis_k_sm2gm[1] += 1
- elif dis_k_sm < dis_k_gm:
- nb_dis_k_sm2gm[2] += 1
- # # dis_k gi -> SM
- if dis_k_gi_min > dis_k_sm:
- nb_dis_k_gi2sm[0] += 1
- repeats_better_dis_k_gi2sm.append(repeat)
- elif dis_k_gi_min == dis_k_sm:
- nb_dis_k_gi2sm[1] += 1
- elif dis_k_gi_min < dis_k_sm:
- nb_dis_k_gi2sm[2] += 1
- # # dis_k gi -> GM
- if dis_k_gi_min > dis_k_gm:
- nb_dis_k_gi2gm[0] += 1
- repeats_better_dis_k_gi2gm.append(repeat)
- elif dis_k_gi_min == dis_k_gm:
- nb_dis_k_gi2gm[1] += 1
- elif dis_k_gi_min < dis_k_gm:
- nb_dis_k_gi2gm[2] += 1
-
- # write result summary.
- sod_sm_mean = np.mean(sod_sm_list)
- sod_gm_mean = np.mean(sod_gm_list)
- dis_k_sm_mean = np.mean(dis_k_sm_list)
- dis_k_gm_mean = np.mean(dis_k_gm_list)
- dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
- sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
- dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
- dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
- dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
- f_summary = open(dir_output + fn_output_summary, 'a')
- csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k,
- sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
- dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean,
- dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm,
- nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm,
- repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm,
- repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
- f_summary.close()
-
- print('\ncomplete.')
- return
-
-
-def getRelations(sign):
- if sign == -1:
- return 'better'
- elif sign == 0:
- return 'same'
- elif sign == 1:
- return 'worse'
-
-
-if __name__ == '__main__':
- find_best_k()
\ No newline at end of file
diff --git a/gklearn/preimage/fitDistance.py b/gklearn/preimage/fitDistance.py
deleted file mode 100644
index 234f7fc..0000000
--- a/gklearn/preimage/fitDistance.py
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Oct 16 14:20:06 2019
-
-@author: ljia
-"""
-import numpy as np
-from tqdm import tqdm
-from itertools import combinations_with_replacement, combinations
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-import time
-import random
-import sys
-
-from scipy import optimize
-from scipy.optimize import minimize
-import cvxpy as cp
-
-from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
-from gklearn.preimage.utils import kernel_distance_matrix
-
-def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
- params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT',
- 'method': 'IPFP', 'stabilizer': None},
- init_costs=[3, 3, 1, 3, 3, 1],
- dataset='monoterpenoides', Kmatrix=None,
- parallel=True):
-# dataset = dataset.lower()
-
- # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
-# random.seed(1)
-# cost_rdm = random.sample(range(1, 10), 6)
-# init_costs = cost_rdm + [0]
-# init_costs = cost_rdm
-# init_costs = [3, 3, 1, 3, 3, 1]
-# init_costs = [i * 0.01 for i in cost_rdm] + [0]
-# init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
-# init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
-# init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
-# idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
-
- # compute distances in feature space.
- dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
- Kmatrix=Kmatrix, gkernel=gkernel)
- dis_k_vec = []
- for i in range(len(dis_k_mat)):
-# for j in range(i, len(dis_k_mat)):
- for j in range(i + 1, len(dis_k_mat)):
- dis_k_vec.append(dis_k_mat[i, j])
- dis_k_vec = np.array(dis_k_vec)
-
- # init ged.
- print('\ninitial:')
- time0 = time.time()
- params_ged['dataset'] = dataset
- params_ged['edit_cost_constant'] = init_costs
- ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
- parallel=parallel)
- residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
- time_list = [time.time() - time0]
- edit_cost_list = [init_costs]
- nb_cost_mat = np.array(n_edit_operations)
- nb_cost_mat_list = [nb_cost_mat]
- print('edit_costs:', init_costs)
- print('residual_list:', residual_list)
-
- for itr in range(itr_max):
- print('\niteration', itr)
- time0 = time.time()
- # "fit" geds to distances in feature space by tuning edit costs using the
- # Least Squares Method.
- np.savez('results/xp_fit_method/fit_data_debug' + str(itr) + '.gm',
- nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec,
- n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
- ged_mat=ged_mat)
- edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec,
- dataset=dataset, cost=params_ged['cost'])
- for i in range(len(edit_costs_new)):
- if -1e-9 <= edit_costs_new[i] <= 1e-9:
- edit_costs_new[i] = 0
- if edit_costs_new[i] < 0:
- raise ValueError('The edit cost is negative.')
-# for i in range(len(edit_costs_new)):
-# if edit_costs_new[i] < 0:
-# edit_costs_new[i] = 0
-
- # compute new GEDs and numbers of edit operations.
- params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
- ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
- parallel=parallel)
- residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
- time_list.append(time.time() - time0)
- edit_cost_list.append(edit_costs_new)
- nb_cost_mat = np.array(n_edit_operations)
- nb_cost_mat_list.append(nb_cost_mat)
- print('edit_costs:', edit_costs_new)
- print('residual_list:', residual_list)
-
- return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
- time_list, nb_cost_mat_list
-
-
-def compute_geds(Gn, params_ged, parallel=False):
- edit_cost_name = params_ged['cost']
- if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2':
- get_nb_eo = get_nb_edit_operations_letter
- elif edit_cost_name == 'NON_SYMBOLIC':
- get_nb_eo = get_nb_edit_operations_nonsymbolic
- else:
- get_nb_eo = get_nb_edit_operations
- ged_mat = np.zeros((len(Gn), len(Gn)))
- if parallel:
-# print('parallel')
-# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
- len_itr = int(len(Gn) * (len(Gn) - 1) / 2)
- ged_vec = [0 for i in range(len_itr)]
- n_edit_operations = [0 for i in range(len_itr)]
-# itr = combinations_with_replacement(range(0, len(Gn)), 2)
- itr = combinations(range(0, len(Gn)), 2)
- n_jobs = multiprocessing.cpu_count()
- if len_itr < 100 * n_jobs:
- chunksize = int(len_itr / n_jobs) + 1
- else:
- chunksize = 100
- def init_worker(gn_toshare):
- global G_gn
- G_gn = gn_toshare
- do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
- pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
- iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
- desc='computing GEDs', file=sys.stdout)
-# iterator = pool.imap_unordered(do_partial, itr, chunksize)
- for i, j, dis, n_eo_tmp in iterator:
- idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2)
- ged_vec[idx_itr] = dis
- ged_mat[i][j] = dis
- ged_mat[j][i] = dis
- n_edit_operations[idx_itr] = n_eo_tmp
-# print('\n-------------------------------------------')
-# print(i, j, idx_itr, dis)
- pool.close()
- pool.join()
-
- else:
- ged_vec = []
- n_edit_operations = []
- for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-# for i in range(len(Gn)):
- for j in range(i + 1, len(Gn)):
- dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged)
- ged_vec.append(dis)
- ged_mat[i][j] = dis
- ged_mat[j][i] = dis
- n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
- n_edit_operations.append(n_eo_tmp)
-
- return ged_vec, ged_mat, n_edit_operations
-
-
-def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
- i = itr[0]
- j = itr[1]
- dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
- return i, j, dis, n_eo_tmp
-
-
-def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
- dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
- n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
- return dis, n_eo_tmp
-
-
-def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides',
- cost='CONSTANT', rw_constraints='inequality'):
-# if dataset == 'Letter-high':
- if cost == 'LETTER':
- pass
-# # method 1: set alpha automatically, just tune c_vir and c_eir by
-# # LMS using cvxpy.
-# alpha = 0.5
-# coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
-## if np.count_nonzero(nb_cost_mat[:,4]) == 0:
-## alpha = 0.75
-## else:
-## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
-## alpha = alpha * 0.99
-# param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
-# param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
-# nb_cost_mat_new = np.column_stack((param_vir, param_eir))
-# dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
-#
-# x = cp.Variable(nb_cost_mat_new.shape[1])
-# cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
-# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-# prob = cp.Problem(cp.Minimize(cost), constraints)
-# prob.solve()
-# edit_costs_new = x.value
-# edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
-# residual = np.sqrt(prob.value)
-
-# # method 2: tune c_vir, c_eir and alpha by nonlinear programming by
-# # scipy.optimize.minimize.
-# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
-# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
-# w2 = nb_cost_mat[:,3]
-# w3 = dis_k_vec
-# func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
-# + w2 * x[2] - w3 * x[3]) ** 2)
-# bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
-# res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
-# edit_costs_new = res.x[0:3]
-# residual = res.fun
-
- # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
-
-
-# # method 4: tune c_vir, c_eir and alpha by QP function
-# # scipy.optimize.least_squares. An initial guess is required.
-# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
-# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
-# w2 = nb_cost_mat[:,3]
-# w3 = dis_k_vec
-# func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
-# + w2 * x[2] - w3 * x[3]) ** 2
-# res = optimize.root(func, [0.9, 1.7, 0.75, 100])
-# edit_costs_new = res.x
-# residual = None
- elif cost == 'LETTER2':
-# # 1. if c_vi != c_vr, c_ei != c_er.
-# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-# x = cp.Variable(nb_cost_mat_new.shape[1])
-# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-## # 1.1 no constraints.
-## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-# # 1.2 c_vs <= c_vi + c_vr.
-# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-## # 2. if c_vi == c_vr, c_ei == c_er.
-## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
-## nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
-## nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
-## x = cp.Variable(nb_cost_mat_new.shape[1])
-## cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-## # 2.1 no constraints.
-## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-### # 2.2 c_vs <= c_vi + c_vr.
-### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-### np.array([2.0, -1.0, 0.0]).T@x >= 0.0]
-#
-# prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-# prob.solve()
-# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
-# edit_costs_new = np.array(edit_costs_new)
-# residual = np.sqrt(prob.value)
- if rw_constraints == 'inequality':
- # c_vs <= c_vi + c_vr.
- nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
- np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- try:
- prob.solve(verbose=True)
- except MemoryError as error0:
- print('\nUsing solver "OSQP" caused a memory error.')
- print('the original error message is\n', error0)
- print('solver status: ', prob.status)
- print('trying solver "CVXOPT" instead...\n')
- try:
- prob.solve(solver=cp.CVXOPT, verbose=True)
- except Exception as error1:
- print('\nAn error occured when using solver "CVXOPT".')
- print('the original error message is\n', error1)
- print('solver status: ', prob.status)
- print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
- prob.solve(solver=cp.MOSEK, verbose=True)
- else:
- print('solver status: ', prob.status)
- else:
- print('solver status: ', prob.status)
- print()
- edit_costs_new = x.value
- residual = np.sqrt(prob.value)
- elif rw_constraints == '2constraints':
- # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
- nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
- np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
- np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
- np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- edit_costs_new = x.value
- residual = np.sqrt(prob.value)
- elif rw_constraints == 'no-constraint':
- # no constraint.
- nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- edit_costs_new = x.value
- residual = np.sqrt(prob.value)
-# elif method == 'inequality_modified':
-# # c_vs <= c_vi + c_vr.
-# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-# x = cp.Variable(nb_cost_mat_new.shape[1])
-# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-# prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-# prob.solve()
-# # use same costs for insertion and removal rather than the fitted costs.
-# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
-# edit_costs_new = np.array(edit_costs_new)
-# residual = np.sqrt(prob.value)
- elif cost == 'NON_SYMBOLIC':
- is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
- is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
-
- if dataset == 'SYNTHETICnew':
-# nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
- nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-# np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-# constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
- constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
- np.array([0.0, 1.0, -1.0]).T@x == 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
-# print(x.value)
- edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value,
- np.array([0.0])))
- residual = np.sqrt(prob.value)
-
- elif rw_constraints == 'inequality':
- # c_vs <= c_vi + c_vr.
- if is_n_attr and is_e_attr:
- nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
- np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
- np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- edit_costs_new = x.value
- residual = np.sqrt(prob.value)
- elif is_n_attr and not is_e_attr:
- nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
- np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- print(x.value)
- edit_costs_new = np.concatenate((x.value, np.array([0.0])))
- residual = np.sqrt(prob.value)
- elif not is_n_attr and is_e_attr:
- nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
- np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
- residual = np.sqrt(prob.value)
- else:
- nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
- x.value[2:], np.array([0.0])))
- residual = np.sqrt(prob.value)
- else:
-# # method 1: simple least square method.
-# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
-# rcond=None)
-
-# # method 2: least square method with x_i >= 0.
-# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
-
- # method 3: solve as a quadratic program with constraints.
-# P = np.dot(nb_cost_mat.T, nb_cost_mat)
-# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
-# G = -1 * np.identity(nb_cost_mat.shape[1])
-# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
-# A = np.array([1 for i in range(nb_cost_mat.shape[1])])
-# b = 1
-# x = cp.Variable(nb_cost_mat.shape[1])
-# prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
-# [G@x <= h])
-# prob.solve()
-# edit_costs_new = x.value
-# residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
-
-# G = -1 * np.identity(nb_cost_mat.shape[1])
-# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
- x = cp.Variable(nb_cost_mat.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
- constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
- # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
- np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
- np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- edit_costs_new = x.value
- residual = np.sqrt(prob.value)
-
- # method 4:
-
- return edit_costs_new, residual
-
-
-if __name__ == '__main__':
- print('check test_fitDistance.py')
\ No newline at end of file
diff --git a/gklearn/preimage/ged.py b/gklearn/preimage/ged.py
deleted file mode 100644
index a66baaf..0000000
--- a/gklearn/preimage/ged.py
+++ /dev/null
@@ -1,467 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 17 18:44:59 2019
-
-@author: ljia
-"""
-import numpy as np
-import networkx as nx
-from tqdm import tqdm
-import sys
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-
-#from gedlibpy_linlin import librariesImport, gedlibpy
-from gklearn.gedlib import librariesImport, gedlibpy
-
-def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP',
- edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
- """
- Compute GED for 2 graphs.
- """
-
-# dataset = dataset.lower()
-
- if lib == 'gedlibpy':
- gedlibpy.restart_env()
- gedlibpy.add_nx_graph(convertGraph(g1, cost), "")
- gedlibpy.add_nx_graph(convertGraph(g2, cost), "")
-
- listID = gedlibpy.get_all_graph_ids()
- gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
- gedlibpy.init()
- gedlibpy.set_method(method, algo_options)
- gedlibpy.init_method()
-
- g = listID[0]
- h = listID[1]
- if stabilizer is None:
- gedlibpy.run_method(g, h)
- pi_forward = gedlibpy.get_forward_map(g, h)
- pi_backward = gedlibpy.get_backward_map(g, h)
- upper = gedlibpy.get_upper_bound(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- elif stabilizer == 'mean':
- # @todo: to be finished...
- upper_list = [np.inf] * repeat
- for itr in range(repeat):
- gedlibpy.run_method(g, h)
- upper_list[itr] = gedlibpy.get_upper_bound(g, h)
- pi_forward = gedlibpy.get_forward_map(g, h)
- pi_backward = gedlibpy.get_backward_map(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- upper = np.mean(upper_list)
- elif stabilizer == 'median':
- if repeat % 2 == 0:
- repeat += 1
- upper_list = [np.inf] * repeat
- pi_forward_list = [0] * repeat
- pi_backward_list = [0] * repeat
- for itr in range(repeat):
- gedlibpy.run_method(g, h)
- upper_list[itr] = gedlibpy.get_upper_bound(g, h)
- pi_forward_list[itr] = gedlibpy.get_forward_map(g, h)
- pi_backward_list[itr] = gedlibpy.get_backward_map(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- upper = np.median(upper_list)
- idx_median = upper_list.index(upper)
- pi_forward = pi_forward_list[idx_median]
- pi_backward = pi_backward_list[idx_median]
- elif stabilizer == 'min':
- upper = np.inf
- for itr in range(repeat):
- gedlibpy.run_method(g, h)
- upper_tmp = gedlibpy.get_upper_bound(g, h)
- if upper_tmp < upper:
- upper = upper_tmp
- pi_forward = gedlibpy.get_forward_map(g, h)
- pi_backward = gedlibpy.get_backward_map(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- if upper == 0:
- break
- elif stabilizer == 'max':
- upper = 0
- for itr in range(repeat):
- gedlibpy.run_method(g, h)
- upper_tmp = gedlibpy.get_upper_bound(g, h)
- if upper_tmp > upper:
- upper = upper_tmp
- pi_forward = gedlibpy.get_forward_map(g, h)
- pi_backward = gedlibpy.get_backward_map(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- elif stabilizer == 'gaussian':
- pass
-
- dis = upper
-
- elif lib == 'gedlib-bash':
- import time
- import random
- import os
- from gklearn.utils.graphfiles import saveDataset
-
- tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
- if not os.path.exists(tmp_dir):
- os.makedirs(tmp_dir)
- fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
- xparams = {'method': 'gedlib', 'graph_dir': fn_collection}
- saveDataset([g1, g2], ['dummy', 'dummy'], gformat='gxl', group='xml',
- filename=fn_collection, xparams=xparams)
-
- command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
- command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
- command += 'export LD_LIBRARY_PATH\n'
- command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
- command += './ged_for_python_bash monoterpenoides ' + fn_collection \
- + ' \'' + algo_options + '\' '
- for ec in edit_cost_constant:
- command += str(ec) + ' '
-# output = os.system(command)
- stream = os.popen(command)
- output = stream.readlines()
-# print(output)
-
- dis = float(output[0].strip())
- runtime = float(output[1].strip())
- size_forward = int(output[2].strip())
- pi_forward = [int(item.strip()) for item in output[3:3+size_forward]]
- pi_backward = [int(item.strip()) for item in output[3+size_forward:]]
-
-# print(dis)
-# print(runtime)
-# print(size_forward)
-# print(pi_forward)
-# print(pi_backward)
-
-
- # make the map label correct (label remove map as np.inf)
- nodes1 = [n for n in g1.nodes()]
- nodes2 = [n for n in g2.nodes()]
- nb1 = nx.number_of_nodes(g1)
- nb2 = nx.number_of_nodes(g2)
- pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
- pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
-# print(pi_forward)
-
-
- return dis, pi_forward, pi_backward
-
-
-def convertGraph(G, cost):
- """Convert a graph to the proper NetworkX format that can be
- recognized by library gedlibpy.
- """
- G_new = nx.Graph()
- if cost == 'LETTER' or cost == 'LETTER2':
- for nd, attrs in G.nodes(data=True):
- G_new.add_node(str(nd), x=str(attrs['attributes'][0]),
- y=str(attrs['attributes'][1]))
- for nd1, nd2, attrs in G.edges(data=True):
- G_new.add_edge(str(nd1), str(nd2))
- elif cost == 'NON_SYMBOLIC':
- for nd, attrs in G.nodes(data=True):
- G_new.add_node(str(nd))
- for a_name in G.graph['node_attrs']:
- G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
- for nd1, nd2, attrs in G.edges(data=True):
- G_new.add_edge(str(nd1), str(nd2))
- for a_name in G.graph['edge_attrs']:
- G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
- else:
- for nd, attrs in G.nodes(data=True):
- G_new.add_node(str(nd), chem=attrs['atom'])
- for nd1, nd2, attrs in G.edges(data=True):
- G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-# G_new.add_edge(str(nd1), str(nd2))
-
- return G_new
-
-
-def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP',
- edit_cost_constant=[], stabilizer='min', repeat=50):
- """
- Compute GEDs for a group of graphs.
- """
- if lib == 'gedlibpy':
- def convertGraph(G):
- """Convert a graph to the proper NetworkX format that can be
- recognized by library gedlibpy.
- """
- G_new = nx.Graph()
- for nd, attrs in G.nodes(data=True):
- G_new.add_node(str(nd), chem=attrs['atom'])
- for nd1, nd2, attrs in G.edges(data=True):
-# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
- G_new.add_edge(str(nd1), str(nd2))
-
- return G_new
-
- gedlibpy.restart_env()
- gedlibpy.add_nx_graph(convertGraph(g1), "")
- gedlibpy.add_nx_graph(convertGraph(g2), "")
-
- listID = gedlibpy.get_all_graph_ids()
- gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
- gedlibpy.init()
- gedlibpy.set_method(method, "")
- gedlibpy.init_method()
-
- g = listID[0]
- h = listID[1]
- if stabilizer is None:
- gedlibpy.run_method(g, h)
- pi_forward = gedlibpy.get_forward_map(g, h)
- pi_backward = gedlibpy.get_backward_map(g, h)
- upper = gedlibpy.get_upper_bound(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- elif stabilizer == 'min':
- upper = np.inf
- for itr in range(repeat):
- gedlibpy.run_method(g, h)
- upper_tmp = gedlibpy.get_upper_bound(g, h)
- if upper_tmp < upper:
- upper = upper_tmp
- pi_forward = gedlibpy.get_forward_map(g, h)
- pi_backward = gedlibpy.get_backward_map(g, h)
- lower = gedlibpy.get_lower_bound(g, h)
- if upper == 0:
- break
-
- dis = upper
-
- # make the map label correct (label remove map as np.inf)
- nodes1 = [n for n in g1.nodes()]
- nodes2 = [n for n in g2.nodes()]
- nb1 = nx.number_of_nodes(g1)
- nb2 = nx.number_of_nodes(g2)
- pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
- pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
-
- return dis, pi_forward, pi_backward
-
-
-def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy',
- 'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [],
- 'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1',
- 'stabilizer': None}, parallel=False):
- if parallel:
- len_itr = int(len(Gn))
- pi_forward_list = [[] for i in range(len_itr)]
- dis_list = [0 for i in range(len_itr)]
-
- itr = range(0, len_itr)
- n_jobs = multiprocessing.cpu_count()
- if len_itr < 100 * n_jobs:
- chunksize = int(len_itr / n_jobs) + 1
- else:
- chunksize = 100
- def init_worker(gn_toshare, gn_median_toshare):
- global G_gn, G_gn_median
- G_gn = gn_toshare
- G_gn_median = gn_median_toshare
- do_partial = partial(_compute_ged_median, params_ged)
- pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn, Gn_median))
- if verbose:
- iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
- desc='computing GEDs', file=sys.stdout)
- else:
- iterator = pool.imap_unordered(do_partial, itr, chunksize)
- for i, dis_sum, pi_forward in iterator:
- pi_forward_list[i] = pi_forward
- dis_list[i] = dis_sum
-# print('\n-------------------------------------------')
-# print(i, j, idx_itr, dis)
- pool.close()
- pool.join()
-
- else:
- dis_list = []
- pi_forward_list = []
- for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
- file=sys.stdout) if verbose else enumerate(Gn):
- dis_sum = 0
- pi_forward_list.append([])
- for G_p in Gn_median:
- dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p,
- **params_ged)
- pi_forward_list[idx].append(pi_tmp_forward)
- dis_sum += dis_tmp
- dis_list.append(dis_sum)
-
- return dis_list, pi_forward_list
-
-
-def _compute_ged_median(params_ged, itr):
-# print(itr)
- dis_sum = 0
- pi_forward = []
- for G_p in G_gn_median:
- dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_gn[itr], G_p,
- **params_ged)
- pi_forward.append(pi_tmp_forward)
- dis_sum += dis_tmp
-
- return itr, dis_sum, pi_forward
-
-
-def get_nb_edit_operations(g1, g2, forward_map, backward_map):
- """Compute the number of each edit operations.
- """
- n_vi = 0
- n_vr = 0
- n_vs = 0
- n_ei = 0
- n_er = 0
- n_es = 0
-
- nodes1 = [n for n in g1.nodes()]
- for i, map_i in enumerate(forward_map):
- if map_i == np.inf:
- n_vr += 1
- elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
- n_vs += 1
- for map_i in backward_map:
- if map_i == np.inf:
- n_vi += 1
-
-# idx_nodes1 = range(0, len(node1))
-
- edges1 = [e for e in g1.edges()]
- nb_edges2_cnted = 0
- for n1, n2 in edges1:
- idx1 = nodes1.index(n1)
- idx2 = nodes1.index(n2)
- # one of the nodes is removed, thus the edge is removed.
- if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
- n_er += 1
- # corresponding edge is in g2.
- elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
- nb_edges2_cnted += 1
- # edge labels are different.
- if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
- != g1.edges[(n1, n2)]['bond_type']:
- n_es += 1
- elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
- nb_edges2_cnted += 1
- # edge labels are different.
- if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
- != g1.edges[(n1, n2)]['bond_type']:
- n_es += 1
- # corresponding nodes are in g2, however the edge is removed.
- else:
- n_er += 1
- n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-
- return n_vi, n_vr, n_vs, n_ei, n_er, n_es
-
-
-def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
- """Compute the number of each edit operations.
- """
- n_vi = 0
- n_vr = 0
- n_vs = 0
- sod_vs = 0
- n_ei = 0
- n_er = 0
-
- nodes1 = [n for n in g1.nodes()]
- for i, map_i in enumerate(forward_map):
- if map_i == np.inf:
- n_vr += 1
- else:
- n_vs += 1
- diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
- diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
- sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
- for map_i in backward_map:
- if map_i == np.inf:
- n_vi += 1
-
-# idx_nodes1 = range(0, len(node1))
-
- edges1 = [e for e in g1.edges()]
- nb_edges2_cnted = 0
- for n1, n2 in edges1:
- idx1 = nodes1.index(n1)
- idx2 = nodes1.index(n2)
- # one of the nodes is removed, thus the edge is removed.
- if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
- n_er += 1
- # corresponding edge is in g2. Edge label is not considered.
- elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
- (forward_map[idx2], forward_map[idx1]) in g2.edges():
- nb_edges2_cnted += 1
- # corresponding nodes are in g2, however the edge is removed.
- else:
- n_er += 1
- n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-
- return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
-
-
-def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
- """Compute the number of each edit operations.
- """
- n_vi = 0
- n_vr = 0
- n_vs = 0
- sod_vs = 0
- n_ei = 0
- n_er = 0
- n_es = 0
- sod_es = 0
-
- nodes1 = [n for n in g1.nodes()]
- for i, map_i in enumerate(forward_map):
- if map_i == np.inf:
- n_vr += 1
- else:
- n_vs += 1
- sum_squares = 0
- for a_name in g1.graph['node_attrs']:
- diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
- sum_squares += np.square(diff)
- sod_vs += np.sqrt(sum_squares)
- for map_i in backward_map:
- if map_i == np.inf:
- n_vi += 1
-
-# idx_nodes1 = range(0, len(node1))
-
- edges1 = [e for e in g1.edges()]
- for n1, n2 in edges1:
- idx1 = nodes1.index(n1)
- idx2 = nodes1.index(n2)
- n1_g2 = forward_map[idx1]
- n2_g2 = forward_map[idx2]
- # one of the nodes is removed, thus the edge is removed.
- if n1_g2 == np.inf or n2_g2 == np.inf:
- n_er += 1
- # corresponding edge is in g2.
- elif (n1_g2, n2_g2) in g2.edges():
- n_es += 1
- sum_squares = 0
- for a_name in g1.graph['edge_attrs']:
- diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
- sum_squares += np.square(diff)
- sod_es += np.sqrt(sum_squares)
- elif (n2_g2, n1_g2) in g2.edges():
- n_es += 1
- sum_squares = 0
- for a_name in g1.graph['edge_attrs']:
- diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
- sum_squares += np.square(diff)
- sod_es += np.sqrt(sum_squares)
- # corresponding nodes are in g2, however the edge is removed.
- else:
- n_er += 1
- n_ei = nx.number_of_edges(g2) - n_es
-
- return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
-
-
-if __name__ == '__main__':
- print('check test_ged.py')
\ No newline at end of file
diff --git a/gklearn/preimage/iam.py b/gklearn/preimage/iam.py
deleted file mode 100644
index f3e2165..0000000
--- a/gklearn/preimage/iam.py
+++ /dev/null
@@ -1,775 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Apr 26 11:49:12 2019
-
-Iterative alternate minimizations using GED.
-@author: ljia
-"""
-import numpy as np
-import random
-import networkx as nx
-from tqdm import tqdm
-
-from gklearn.utils.graphdataset import get_dataset_attributes
-from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
-from gklearn.preimage.ged import GED, ged_median
-
-
-def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
- epsilon=0.001, node_label='atom', edge_label='bond_type',
- connected=False, removeNodes=True, allBestInit=False, allBestNodes=False,
- allBestEdges=False, allBestOutput=False,
- params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP',
- 'edit_cost_constant': [], 'stabilizer': None,
- 'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'}):
- """See my name, then you know what I do.
- """
-# Gn_median = Gn_median[0:10]
-# Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
- node_ir = np.inf # corresponding to the node remove and insertion.
- label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
- ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate,
- attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'],
- edge_label=edge_label)
- node_label_set = get_node_labels(Gn_median, node_label)
- edge_label_set = get_edge_labels(Gn_median, edge_label)
-
-
- def generate_graph(G, pi_p_forward):
- G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
-# nx.draw_networkx(G)
-# import matplotlib.pyplot as plt
-# plt.show()
-# print(pi_p_forward)
-
- # update vertex labels.
- # pre-compute h_i0 for each label.
-# for label in get_node_labels(Gn, node_label):
-# print(label)
-# for nd in G.nodes(data=True):
-# pass
- if not ds_attrs['node_attr_dim']: # labels are symbolic
- for ndi, (nd, _) in enumerate(G.nodes(data=True)):
- h_i0_list = []
- label_list = []
- for label in node_label_set:
- h_i0 = 0
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][ndi]
- if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
- h_i0 += 1
- h_i0_list.append(h_i0)
- label_list.append(label)
- # case when the node is to be removed.
- if removeNodes:
- h_i0_remove = 0 # @todo: maybe this can be added to the node_label_set above.
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][ndi]
- if pi_i == node_ir:
- h_i0_remove += 1
- h_i0_list.append(h_i0_remove)
- label_list.append(label_r)
- # get the best labels.
- idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
- if allBestNodes: # choose all best graphs.
- nlabel_best = [label_list[idx] for idx in idx_max]
- # generate "best" graphs with regard to "best" node labels.
- G_new_list_nd = []
- for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
- for nl in nlabel_best:
- g_tmp = g.copy()
- if nl == label_r:
- g_tmp.remove_node(nd)
- else:
- g_tmp.nodes[nd][node_label] = nl
- G_new_list_nd.append(g_tmp)
- # nx.draw_networkx(g_tmp)
- # import matplotlib.pyplot as plt
- # plt.show()
- # print(g_tmp.nodes(data=True))
- # print(g_tmp.edges(data=True))
- G_new_list = [ggg.copy() for ggg in G_new_list_nd]
- else:
- # choose one of the best randomly.
- idx_rdm = random.randint(0, len(idx_max) - 1)
- best_label = label_list[idx_max[idx_rdm]]
- h_i0_max = h_i0_list[idx_max[idx_rdm]]
-
- g_new = G_new_list[0]
- if best_label == label_r:
- g_new.remove_node(nd)
- else:
- g_new.nodes[nd][node_label] = best_label
- G_new_list = [g_new]
- else: # labels are non-symbolic
- for ndi, (nd, _) in enumerate(G.nodes(data=True)):
- Si_norm = 0
- phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][ndi]
- if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
- Si_norm += 1
- phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
- phi_i_bar /= Si_norm
- G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
-
-# for g in G_new_list:
-# import matplotlib.pyplot as plt
-# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-# plt.show()
-# print(g.nodes(data=True))
-# print(g.edges(data=True))
-
- # update edge labels and adjacency matrix.
- if ds_attrs['edge_labeled']:
- G_new_list_edge = []
- for g_new in G_new_list:
- nd_list = [n for n in g_new.nodes()]
- g_tmp_list = [g_new.copy()]
- for nd1i in range(nx.number_of_nodes(g_new)):
- nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes
- for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)):
- nd2 = nd_list[nd2i]
-# for nd1, nd2, _ in g_new.edges(data=True):
- h_ij0_list = []
- label_list = []
- for label in edge_label_set:
- h_ij0 = 0
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][nd1i]
- pi_j = pi_p_forward[idx][nd2i]
- h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
- g.has_edge(pi_i, pi_j) and
- g.edges[pi_i, pi_j][edge_label] == label)
- h_ij0 += h_ij0_p
- h_ij0_list.append(h_ij0)
- label_list.append(label)
-
- # get the best labels.
- idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
- if allBestEdges: # choose all best graphs.
- elabel_best = [label_list[idx] for idx in idx_max]
- h_ij0_max = [h_ij0_list[idx] for idx in idx_max]
- # generate "best" graphs with regard to "best" node labels.
- G_new_list_ed = []
- for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
- for idxl, el in enumerate(elabel_best):
- g_tmp_copy = g_tmp.copy()
- # check whether a_ij is 0 or 1.
- sij_norm = 0
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][nd1i]
- pi_j = pi_p_forward[idx][nd2i]
- if g.has_node(pi_i) and g.has_node(pi_j) and \
- g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \
- sij_norm * (1 - (c_er + c_ei) / c_es):
- if not g_tmp_copy.has_edge(nd1, nd2):
- g_tmp_copy.add_edge(nd1, nd2)
- g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl]
- else:
- if g_tmp_copy.has_edge(nd1, nd2):
- g_tmp_copy.remove_edge(nd1, nd2)
- G_new_list_ed.append(g_tmp_copy)
- g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]
- else: # choose one of the best randomly.
- idx_rdm = random.randint(0, len(idx_max) - 1)
- best_label = label_list[idx_max[idx_rdm]]
- h_ij0_max = h_ij0_list[idx_max[idx_rdm]]
-
- # check whether a_ij is 0 or 1.
- sij_norm = 0
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][nd1i]
- pi_j = pi_p_forward[idx][nd2i]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
- if not g_new.has_edge(nd1, nd2):
- g_new.add_edge(nd1, nd2)
- g_new.edges[nd1, nd2][edge_label] = best_label
- else:
-# elif h_ij0_max < len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
- if g_new.has_edge(nd1, nd2):
- g_new.remove_edge(nd1, nd2)
- g_tmp_list = [g_new]
- G_new_list_edge += g_tmp_list
- G_new_list = [ggg.copy() for ggg in G_new_list_edge]
-
-
- else: # if edges are unlabeled
- # @todo: is this even right? G or g_tmp? check if the new one is right
- # @todo: works only for undirected graphs.
-
- for g_tmp in G_new_list:
- nd_list = [n for n in g_tmp.nodes()]
- for nd1i in range(nx.number_of_nodes(g_tmp)):
- nd1 = nd_list[nd1i]
- for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)):
- nd2 = nd_list[nd2i]
- sij_norm = 0
- for idx, g in enumerate(Gn_median):
- pi_i = pi_p_forward[idx][nd1i]
- pi_j = pi_p_forward[idx][nd2i]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
- # @todo: should we consider if nd1 and nd2 in g_tmp?
- # or just add the edge anyway?
- if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
- and not g_tmp.has_edge(nd1, nd2):
- g_tmp.add_edge(nd1, nd2)
- else: # @todo: which to use?
-# elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
- if g_tmp.has_edge(nd1, nd2):
- g_tmp.remove_edge(nd1, nd2)
- # do not change anything when equal.
-
-# for i, g in enumerate(G_new_list):
-# import matplotlib.pyplot as plt
-# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-# plt.show()
-# print(g.nodes(data=True))
-# print(g.edges(data=True))
-
-# # find the best graph generated in this iteration and update pi_p.
- # @todo: should we update all graphs generated or just the best ones?
- dis_list, pi_forward_list = ged_median(G_new_list, Gn_median,
- params_ged=params_ged)
- # @todo: should we remove the identical and connectivity check?
- # Don't know which is faster.
- if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
- G_new_list, idx_list = remove_duplicates(G_new_list)
- pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
- dis_list = [dis_list[idx] for idx in idx_list]
-# if connected == True:
-# G_new_list, idx_list = remove_disconnected(G_new_list)
-# pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-# idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
-# dis_min = dis_list[idx_min_tmp_list[0]]
-# pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
-# G_new_list = [G_new_list[idx] for idx in idx_min_list]
-
-# for g in G_new_list:
-# import matplotlib.pyplot as plt
-# nx.draw_networkx(g)
-# plt.show()
-# print(g.nodes(data=True))
-# print(g.edges(data=True))
-
- return G_new_list, pi_forward_list, dis_list
-
-
- def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
- idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
- dis_min = dis_all[idx_min_list[0]]
- pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
- G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
- return G_min_list, pi_forward_min_list, dis_min
-
-
- def iteration_proc(G, pi_p_forward, cur_sod):
- G_list = [G]
- pi_forward_list = [pi_p_forward]
- old_sod = cur_sod * 2
- sod_list = [cur_sod]
- dis_list = [cur_sod]
- # iterations.
- itr = 0
- # @todo: what if difference == 0?
-# while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
-# np.abs(old_sod - cur_sod) == 0):
- while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
-# while itr < ite_max:
-# for itr in range(0, 5): # the convergence condition?
- print('itr_iam is', itr)
- G_new_list = []
- pi_forward_new_list = []
- dis_new_list = []
- for idx, g in enumerate(G_list):
-# label_set = get_node_labels(Gn_median + [g], node_label)
- G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
- g, pi_forward_list[idx])
- G_new_list += G_tmp_list
- pi_forward_new_list += pi_forward_tmp_list
- dis_new_list += dis_tmp_list
- # @todo: need to remove duplicates here?
- G_list = [ggg.copy() for ggg in G_new_list]
- pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list]
- dis_list = dis_new_list[:]
-
- old_sod = cur_sod
- cur_sod = np.min(dis_list)
- sod_list.append(cur_sod)
-
- itr += 1
-
- # @todo: do we return all graphs or the best ones?
- # get the best ones of the generated graphs.
- G_list, pi_forward_list, dis_min = best_median_graphs(
- G_list, pi_forward_list, dis_list)
-
- if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
- G_list, idx_list = remove_duplicates(G_list)
- pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-# dis_list = [dis_list[idx] for idx in idx_list]
-
-# import matplotlib.pyplot as plt
-# for g in G_list:
-# nx.draw_networkx(g)
-# plt.show()
-# print(g.nodes(data=True))
-# print(g.edges(data=True))
-
- print('\nsods:', sod_list, '\n')
-
- return G_list, pi_forward_list, dis_min, sod_list
-
-
- def remove_duplicates(Gn):
- """Remove duplicate graphs from list.
- """
- Gn_new = []
- idx_list = []
- for idx, g in enumerate(Gn):
- dupl = False
- for g_new in Gn_new:
- if graph_isIdentical(g_new, g):
- dupl = True
- break
- if not dupl:
- Gn_new.append(g)
- idx_list.append(idx)
- return Gn_new, idx_list
-
-
- def remove_disconnected(Gn):
- """Remove disconnected graphs from list.
- """
- Gn_new = []
- idx_list = []
- for idx, g in enumerate(Gn):
- if nx.is_connected(g):
- Gn_new.append(g)
- idx_list.append(idx)
- return Gn_new, idx_list
-
-
- ###########################################################################
-
- # phase 1: initilize.
- # compute set-median.
- dis_min = np.inf
- dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
- params_ged=params_ged, parallel=True)
- print('finish computing GEDs.')
- # find all smallest distances.
- if allBestInit: # try all best init graphs.
- idx_min_list = range(len(dis_list))
- dis_min = dis_list
- else:
- idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
- dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
- idx_min_rdm = random.randint(0, len(idx_min_list) - 1)
- idx_min_list = [idx_min_list[idx_min_rdm]]
- sod_set_median = np.min(dis_min)
-
-
- # phase 2: iteration.
- G_list = []
- dis_list = []
- pi_forward_list = []
- G_set_median_list = []
-# sod_list = []
- for idx_tmp, idx_min in enumerate(idx_min_list):
-# print('idx_min is', idx_min)
- G = Gn_candidate[idx_min].copy()
- G_set_median_list.append(G.copy())
- # list of edit operations.
- pi_p_forward = pi_forward_all[idx_min]
-# pi_p_backward = pi_all_backward[idx_min]
- Gi_list, pi_i_forward_list, dis_i_min, sod_list = iteration_proc(G,
- pi_p_forward, dis_min[idx_tmp])
- G_list += Gi_list
- dis_list += [dis_i_min] * len(Gi_list)
- pi_forward_list += pi_i_forward_list
-
-
- if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
- G_list, idx_list = remove_duplicates(G_list)
- dis_list = [dis_list[idx] for idx in idx_list]
- pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
- if connected == True:
- G_list_con, idx_list = remove_disconnected(G_list)
- # if there is no connected graphs at all, then remain the disconnected ones.
- if len(G_list_con) > 0: # @todo: ??????????????????????????
- G_list = G_list_con
- dis_list = [dis_list[idx] for idx in idx_list]
- pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-
-# import matplotlib.pyplot as plt
-# for g in G_list:
-# nx.draw_networkx(g)
-# plt.show()
-# print(g.nodes(data=True))
-# print(g.edges(data=True))
-
- # get the best median graphs
- G_gen_median_list, pi_forward_min_list, sod_gen_median = best_median_graphs(
- G_list, pi_forward_list, dis_list)
-# for g in G_gen_median_list:
-# nx.draw_networkx(g)
-# plt.show()
-# print(g.nodes(data=True))
-# print(g.edges(data=True))
-
- if not allBestOutput:
- # randomly choose one graph.
- idx_rdm = random.randint(0, len(G_gen_median_list) - 1)
- G_gen_median_list = [G_gen_median_list[idx_rdm]]
-
- return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median
-
-
-def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
- dataset='monoterpenoides',
- graph_dir=''):
- """Compute the iam by c++ implementation (gedlib) through bash.
- """
- import os
- import time
-
- def createCollectionFile(Gn_names, y, filename):
- """Create collection file.
- """
- dirname_ds = os.path.dirname(filename)
- if dirname_ds != '':
- dirname_ds += '/'
- if not os.path.exists(dirname_ds) :
- os.makedirs(dirname_ds)
-
- with open(filename + '.xml', 'w') as fgroup:
- fgroup.write("")
- fgroup.write("\n")
- fgroup.write("\n")
- for idx, fname in enumerate(Gn_names):
- fgroup.write("\n\t")
- fgroup.write("\n")
- fgroup.close()
-
- tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
- fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
- createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
-# fn_collection = tmp_dir + 'collection_for_debug'
-# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
-
-# if dataset == 'Letter-high' or dataset == 'Fingerprint':
-# dataset = 'letter'
- command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
- command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
- command += 'export LD_LIBRARY_PATH\n'
- command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
- command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
- + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
- if edit_cost_constant is None:
- command += 'None'
- else:
- for ec in edit_cost_constant:
- command += str(ec) + ' '
-# output = os.system(command)
- stream = os.popen(command)
-
- output = stream.readlines()
-# print(output)
- sod_sm = float(output[0].strip())
- sod_gm = float(output[1].strip())
-
- fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
- fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-
- return sod_sm, sod_gm, fname_sm, fname_gm
-
-
-
-###############################################################################
-# Old implementations.
-
-def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type',
- connected=True):
- """See my name, then you know what I do.
- """
-# Gn = Gn[0:10]
- Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-
- # phase 1: initilize.
- # compute set-median.
- dis_min = np.inf
- pi_p = []
- pi_all = []
- for idx1, G_p in enumerate(Gn):
- dist_sum = 0
- pi_all.append([])
- for idx2, G_p_prime in enumerate(Gn):
- dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
- pi_all[idx1].append(pi_tmp)
- dist_sum += dist_tmp
- if dist_sum < dis_min:
- dis_min = dist_sum
- G = G_p.copy()
- idx_min = idx1
- # list of edit operations.
- pi_p = pi_all[idx_min]
-
- # phase 2: iteration.
- ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'],
- edge_label=edge_label)
- for itr in range(0, 10): # @todo: the convergence condition?
- G_new = G.copy()
- # update vertex labels.
- # pre-compute h_i0 for each label.
-# for label in get_node_labels(Gn, node_label):
-# print(label)
-# for nd in G.nodes(data=True):
-# pass
- if not ds_attrs['node_attr_dim']: # labels are symbolic
- for nd, _ in G.nodes(data=True):
- h_i0_list = []
- label_list = []
- for label in get_node_labels(Gn, node_label):
- h_i0 = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd]
- if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
- h_i0 += 1
- h_i0_list.append(h_i0)
- label_list.append(label)
- # choose one of the best randomly.
- idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
- idx_rdm = random.randint(0, len(idx_max) - 1)
- G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
- else: # labels are non-symbolic
- for nd, _ in G.nodes(data=True):
- Si_norm = 0
- phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd]
- if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
- Si_norm += 1
- phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
- phi_i_bar /= Si_norm
- G_new.nodes[nd]['attributes'] = phi_i_bar
-
- # update edge labels and adjacency matrix.
- if ds_attrs['edge_labeled']:
- for nd1, nd2, _ in G.edges(data=True):
- h_ij0_list = []
- label_list = []
- for label in get_edge_labels(Gn, edge_label):
- h_ij0 = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd1]
- pi_j = pi_p[idx][nd2]
- h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
- g.has_edge(pi_i, pi_j) and
- g.edges[pi_i, pi_j][edge_label] == label)
- h_ij0 += h_ij0_p
- h_ij0_list.append(h_ij0)
- label_list.append(label)
- # choose one of the best randomly.
- idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
- h_ij0_max = h_ij0_list[idx_max[0]]
- idx_rdm = random.randint(0, len(idx_max) - 1)
- best_label = label_list[idx_max[idx_rdm]]
-
- # check whether a_ij is 0 or 1.
- sij_norm = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd1]
- pi_j = pi_p[idx][nd2]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
- if not G_new.has_edge(nd1, nd2):
- G_new.add_edge(nd1, nd2)
- G_new.edges[nd1, nd2][edge_label] = best_label
- else:
- if G_new.has_edge(nd1, nd2):
- G_new.remove_edge(nd1, nd2)
- else: # if edges are unlabeled
- for nd1, nd2, _ in G.edges(data=True):
- sij_norm = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p[idx][nd1]
- pi_j = pi_p[idx][nd2]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if sij_norm > len(Gn) * c_er / (c_er + c_ei):
- if not G_new.has_edge(nd1, nd2):
- G_new.add_edge(nd1, nd2)
- else:
- if G_new.has_edge(nd1, nd2):
- G_new.remove_edge(nd1, nd2)
-
- G = G_new.copy()
-
- # update pi_p
- pi_p = []
- for idx1, G_p in enumerate(Gn):
- dist_tmp, pi_tmp, _ = GED(G, G_p)
- pi_p.append(pi_tmp)
-
- return G
-
-# --------------------------- These are tests --------------------------------#
-
-def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1,
- node_label='atom', edge_label='bond_type'):
- """See my name, then you know what I do.
- """
-# Gn = Gn[0:10]
- Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-
- # phase 1: initilize.
- # compute set-median.
- dis_min = np.inf
-# pi_p = []
- pi_all_forward = []
- pi_all_backward = []
- for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
- dist_sum = 0
- pi_all_forward.append([])
- pi_all_backward.append([])
- for idx2, G_p_prime in enumerate(Gn):
- dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
- pi_all_forward[idx1].append(pi_tmp_forward)
- pi_all_backward[idx1].append(pi_tmp_backward)
- dist_sum += dist_tmp
- if dist_sum <= dis_min:
- dis_min = dist_sum
- G = G_p.copy()
- idx_min = idx1
- # list of edit operations.
- pi_p_forward = pi_all_forward[idx_min]
- pi_p_backward = pi_all_backward[idx_min]
-
- # phase 2: iteration.
- ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'],
- edge_label=edge_label)
- label_set = get_node_labels(Gn + [G], node_label)
- for itr in range(0, 10): # @todo: the convergence condition?
- G_new = G.copy()
- # update vertex labels.
- # pre-compute h_i0 for each label.
-# for label in get_node_labels(Gn, node_label):
-# print(label)
-# for nd in G.nodes(data=True):
-# pass
- if not ds_attrs['node_attr_dim']: # labels are symbolic
- for nd in G.nodes():
- h_i0_list = []
- label_list = []
- for label in label_set:
- h_i0 = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p_forward[idx][nd]
- if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
- h_i0 += 1
- h_i0_list.append(h_i0)
- label_list.append(label)
- # choose one of the best randomly.
- idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
- idx_rdm = random.randint(0, len(idx_max) - 1)
- G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
- else: # labels are non-symbolic
- for nd in G.nodes():
- Si_norm = 0
- phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
- for idx, g in enumerate(Gn):
- pi_i = pi_p_forward[idx][nd]
- if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
- Si_norm += 1
- phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
- phi_i_bar /= Si_norm
- G_new.nodes[nd]['attributes'] = phi_i_bar
-
- # update edge labels and adjacency matrix.
- if ds_attrs['edge_labeled']:
- for nd1, nd2, _ in G.edges(data=True):
- h_ij0_list = []
- label_list = []
- for label in get_edge_labels(Gn, edge_label):
- h_ij0 = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p_forward[idx][nd1]
- pi_j = pi_p_forward[idx][nd2]
- h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
- g.has_edge(pi_i, pi_j) and
- g.edges[pi_i, pi_j][edge_label] == label)
- h_ij0 += h_ij0_p
- h_ij0_list.append(h_ij0)
- label_list.append(label)
- # choose one of the best randomly.
- idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
- h_ij0_max = h_ij0_list[idx_max[0]]
- idx_rdm = random.randint(0, len(idx_max) - 1)
- best_label = label_list[idx_max[idx_rdm]]
-
- # check whether a_ij is 0 or 1.
- sij_norm = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p_forward[idx][nd1]
- pi_j = pi_p_forward[idx][nd2]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
- if not G_new.has_edge(nd1, nd2):
- G_new.add_edge(nd1, nd2)
- G_new.edges[nd1, nd2][edge_label] = best_label
- else:
- if G_new.has_edge(nd1, nd2):
- G_new.remove_edge(nd1, nd2)
- else: # if edges are unlabeled
- # @todo: works only for undirected graphs.
- for nd1 in range(nx.number_of_nodes(G)):
- for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
- sij_norm = 0
- for idx, g in enumerate(Gn):
- pi_i = pi_p_forward[idx][nd1]
- pi_j = pi_p_forward[idx][nd2]
- if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
- sij_norm += 1
- if sij_norm > len(Gn) * c_er / (c_er + c_ei):
- if not G_new.has_edge(nd1, nd2):
- G_new.add_edge(nd1, nd2)
- elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
- if G_new.has_edge(nd1, nd2):
- G_new.remove_edge(nd1, nd2)
- # do not change anything when equal.
-
- G = G_new.copy()
-
- # update pi_p
- pi_p_forward = []
- for G_p in Gn:
- dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
- pi_p_forward.append(pi_tmp_forward)
-
- return G
-
-
-###############################################################################
-
-if __name__ == '__main__':
- from gklearn.utils.graphfiles import loadDataset
- ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
- 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
-# ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-# 'extra_params': {}} # node nsymb
-# ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-# 'extra_params': {}}
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
- iam(Gn)
\ No newline at end of file
diff --git a/gklearn/preimage/knn.py b/gklearn/preimage/knn.py
deleted file mode 100644
index c179287..0000000
--- a/gklearn/preimage/knn.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jan 10 13:22:04 2020
-
-@author: ljia
-"""
-import numpy as np
-#import matplotlib.pyplot as plt
-from tqdm import tqdm
-import random
-#import csv
-from shutil import copyfile
-import os
-
-from gklearn.preimage.iam import iam_bash
-from gklearn.utils.graphfiles import loadDataset, loadGXL
-from gklearn.preimage.ged import GED
-from gklearn.preimage.utils import get_same_item_indices
-
-def test_knn():
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
-# Gn = Gn[0:50]
-# gkernel = 'treeletkernel'
-# node_label = 'atom'
-# edge_label = 'bond_type'
-# ds_name = 'mono'
- dir_output = 'results/knn/'
- graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
-
- k_nn = 1
- percent = 0.1
- repeats = 50
- edit_cost_constant = [3, 3, 1, 3, 3, 1]
-
- # get indices by classes.
- y_idx = get_same_item_indices(y_all)
- sod_sm_list_list
- for repeat in range(0, repeats):
- print('\n---------------------------------')
- print('repeat =', repeat)
- accuracy_sm_list = []
- accuracy_gm_list = []
- sod_sm_list = []
- sod_gm_list = []
-
- random.seed(repeat)
- set_median_list = []
- gen_median_list = []
- train_y_set = []
- for y, values in y_idx.items():
- print('\ny =', y)
- size_median_set = int(len(values) * percent)
- median_set_idx = random.sample(values, size_median_set)
- print('median set: ', median_set_idx)
-
- # compute set median and gen median using IAM (C++ through bash).
- # Gn_median = [Gn[idx] for idx in median_set_idx]
- group_fnames = [Gn[g].graph['filename'] for g in median_set_idx]
- sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
- graph_dir=graph_dir)
- print('sod_sm, sod_gm:', sod_sm, sod_gm)
- sod_sm_list.append(sod_sm)
- sod_gm_list.append(sod_gm)
- fname_sm_new = dir_output + 'medians/set_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
- copyfile(fname_sm, fname_sm_new)
- fname_gm_new = dir_output + 'medians/gen_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
- copyfile(fname_gm, fname_gm_new)
- set_median_list.append(loadGXL(fname_sm_new))
- gen_median_list.append(loadGXL(fname_gm_new))
- train_y_set.append(int(y))
-
- print(sod_sm, sod_gm)
-
- # do 1-nn.
- test_y_set = [int(y) for y in y_all]
- accuracy_sm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
- accuracy_gm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
- accuracy_sm_list.append(accuracy_sm)
- accuracy_gm_list.append(accuracy_gm)
- print('current accuracy sm and gm:', accuracy_sm, accuracy_gm)
-
- # output
- accuracy_sm_mean = np.mean(accuracy_sm_list)
- accuracy_gm_mean = np.mean(accuracy_gm_list)
- print('\ntotal average accuracy sm and gm:', accuracy_sm_mean, accuracy_gm_mean)
-
-
-def knn(train_set, train_y_set, test_set, test_y_set, k=1, distance='ged'):
- if k == 1 and distance == 'ged':
- algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
- 'algo_options': algo_options, 'stabilizer': None}
- accuracy = 0
- for idx_test, g_test in tqdm(enumerate(test_set), desc='computing 1-nn',
- file=sys.stdout):
- dis = np.inf
- for idx_train, g_train in enumerate(train_set):
- dis_cur, _, _ = GED(g_test, g_train, **params_ged)
- if dis_cur < dis:
- dis = dis_cur
- test_y_cur = train_y_set[idx_train]
- if test_y_cur == test_y_set[idx_test]:
- accuracy += 1
- accuracy = accuracy / len(test_set)
-
- return accuracy
-
-
-
-if __name__ == '__main__':
- test_knn()
\ No newline at end of file
diff --git a/gklearn/preimage/libs.py b/gklearn/preimage/libs.py
deleted file mode 100644
index 76005c6..0000000
--- a/gklearn/preimage/libs.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import sys
-import pathlib
-
-# insert gedlibpy library.
-sys.path.insert(0, "../../../")
-from gedlibpy import librariesImport, gedlibpy
diff --git a/gklearn/preimage/median.py b/gklearn/preimage/median.py
deleted file mode 100644
index 1c5bb0f..0000000
--- a/gklearn/preimage/median.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import sys
-sys.path.insert(0, "../")
-#import pathlib
-import numpy as np
-import networkx as nx
-import time
-
-from gedlibpy import librariesImport, gedlibpy
-#import script
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-from gklearn.utils.graphfiles import loadDataset
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
- """
- Replace a graph in script
-
- If old_id is -1, add a new graph to the environnemt
-
- """
- if(old_id > -1):
- script.PyClearGraph(old_id)
- new_id = script.PyAddGraph(label)
- for i in graph.nodes():
- script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
- for e in graph.edges:
- script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
- script.PyInitEnv()
- script.PySetMethod("IPFP", "")
- script.PyInitMethod()
-
- return new_id
-
-#Dessin median courrant
-def draw_Letter_graph(graph, savepath=''):
- import numpy as np
- import networkx as nx
- import matplotlib.pyplot as plt
- plt.figure()
- pos = {}
- for n in graph.nodes:
- pos[n] = np.array([float(graph.node[n]['attributes'][0]),
- float(graph.node[n]['attributes'][1])])
- nx.draw_networkx(graph, pos)
- if savepath != '':
- plt.savefig(savepath + str(time.time()) + '.eps', format='eps', dpi=300)
- plt.show()
- plt.clf()
-
-#compute new mappings
-def update_mappings(script,median_id,listID):
- med_distances = {}
- med_mappings = {}
- sod = 0
- for i in range(0,len(listID)):
- script.PyRunMethod(median_id,listID[i])
- med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
- med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
- sod += med_distances[i]
- return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
- s_ij = 0
- for k in range(0,len(all_mappings)):
- cur_graph = all_graphs[k]
- cur_mapping = all_mappings[k]
- size_graph = cur_graph.order()
- if ((cur_mapping[i] < size_graph) and
- (cur_mapping[j] < size_graph) and
- (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
- s_ij += 1
-
- return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-# from scipy.stats.mstats import gmean
-
-# for i in median.nodes():
-# for k in listIdSet:
-# vectors = [] #np.zeros((len(listIdSet),2))
-# if(k != median_id):
-# phi_i = mappings[k][i]
-# if(phi_i < dataset[k].order()):
-# vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-# new_labels = gmean(vectors)
-# median.node[i]['x'] = str(new_labels[0])
-# median.node[i]['y'] = str(new_labels[1])
-# return median
-
-def update_median_nodes(median,dataset,mappings):
- #update node attributes
- for i in median.nodes():
- nb_sub=0
- mean_label = {'x' : 0, 'y' : 0}
- for k in range(0,len(mappings)):
- phi_i = mappings[k][i]
- if ( phi_i < dataset[k].order() ):
- nb_sub += 1
- mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
- mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
- median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
- median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
- return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
- size_dataset = len(dataset)
- ratio_cei_cer = cer/(cei + cer)
- threshold = size_dataset*ratio_cei_cer
- order_graph_median = median.order()
- for i in range(0,order_graph_median):
- for j in range(i+1,order_graph_median):
- s_ij = calcul_Sij(mappings,dataset,i,j)
- if(s_ij > threshold):
- median.add_edge(i,j)
- else:
- if(median.has_edge(i,j)):
- median.remove_edge(i,j)
- return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
- """Compute a graph median of a dataset according to an environment
-
- Parameters
-
- script : An gedlib initialized environnement
- listID (list): a list of ID in script: encodes the dataset
- dataset (list): corresponding graphs in networkX format. We assume that graph
- listID[i] corresponds to dataset[i]
-
- Returns:
- A networkX graph, which is the median, with corresponding sod
- """
- print(len(listID))
- median_set_index, median_set_sod = compute_median_set(script, listID)
- print(median_set_index)
- print(median_set_sod)
- sods = []
- #Ajout median dans environnement
- set_median = dataset[median_set_index].copy()
- median = dataset[median_set_index].copy()
- cur_med_id = replace_graph_in_env(script,median,-1)
- med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
- sods.append(cur_sod)
- if(verbose):
- print(cur_sod)
- ite_max = 50
- old_sod = cur_sod * 2
- ite = 0
- epsilon = 0.001
-
- best_median
- while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
- median = update_median_nodes(median,dataset, med_mappings)
- median = update_median_edges(dataset,med_mappings,median)
-
- cur_med_id = replace_graph_in_env(script,median,cur_med_id)
- med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-
-
- sods.append(cur_sod)
- if(verbose):
- print(cur_sod)
- ite += 1
- return median, cur_sod, sods, set_median
-
- draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
- 'Returns the id in listID corresponding to median set'
- #Calcul median set
- N=len(listID)
- map_id_to_index = {}
- map_index_to_id = {}
- for i in range(0,len(listID)):
- map_id_to_index[listID[i]] = i
- map_index_to_id[i] = listID[i]
-
- distances = np.zeros((N,N))
- for i in listID:
- for j in listID:
- script.PyRunMethod(i,j)
- distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
- median_set_index = np.argmin(np.sum(distances,0))
- sod = np.min(np.sum(distances,0))
-
- return median_set_index, sod
-
-if __name__ == "__main__":
- #Chargement du dataset
- script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
- script.PySetEditCost("LETTER")
- script.PyInitEnv()
- script.PySetMethod("IPFP", "")
- script.PyInitMethod()
-
- dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
- listID = script.PyGetAllGraphIds()
- median, sod = compute_median(script,listID,dataset,verbose=True)
-
- print(sod)
- draw_Letter_graph(median)
-
-
-#if __name__ == '__main__':
-# # test draw_Letter_graph
-# ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-# 'extra_params': {}} # node nsymb
-# Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-# print(y_all)
-# for g in Gn:
-# draw_Letter_graph(g)
\ No newline at end of file
diff --git a/gklearn/preimage/median_benoit.py b/gklearn/preimage/median_benoit.py
deleted file mode 100644
index 6712196..0000000
--- a/gklearn/preimage/median_benoit.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import sys
-import pathlib
-import numpy as np
-import networkx as nx
-
-import librariesImport
-import script
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
- """
- Replace a graph in script
-
- If old_id is -1, add a new graph to the environnemt
-
- """
- if(old_id > -1):
- script.PyClearGraph(old_id)
- new_id = script.PyAddGraph(label)
- for i in graph.nodes():
- script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
- for e in graph.edges:
- script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
- script.PyInitEnv()
- script.PySetMethod("IPFP", "")
- script.PyInitMethod()
-
- return new_id
-
-#Dessin median courrant
-def draw_Letter_graph(graph):
- import numpy as np
- import networkx as nx
- import matplotlib.pyplot as plt
- plt.figure()
- pos = {}
- for n in graph.nodes:
- pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
- nx.draw_networkx(graph,pos)
- plt.show()
-
-#compute new mappings
-def update_mappings(script,median_id,listID):
- med_distances = {}
- med_mappings = {}
- sod = 0
- for i in range(0,len(listID)):
- script.PyRunMethod(median_id,listID[i])
- med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
- med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
- sod += med_distances[i]
- return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
- s_ij = 0
- for k in range(0,len(all_mappings)):
- cur_graph = all_graphs[k]
- cur_mapping = all_mappings[k]
- size_graph = cur_graph.order()
- if ((cur_mapping[i] < size_graph) and
- (cur_mapping[j] < size_graph) and
- (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
- s_ij += 1
-
- return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-# from scipy.stats.mstats import gmean
-
-# for i in median.nodes():
-# for k in listIdSet:
-# vectors = [] #np.zeros((len(listIdSet),2))
-# if(k != median_id):
-# phi_i = mappings[k][i]
-# if(phi_i < dataset[k].order()):
-# vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-# new_labels = gmean(vectors)
-# median.node[i]['x'] = str(new_labels[0])
-# median.node[i]['y'] = str(new_labels[1])
-# return median
-
-def update_median_nodes(median,dataset,mappings):
- #update node attributes
- for i in median.nodes():
- nb_sub=0
- mean_label = {'x' : 0, 'y' : 0}
- for k in range(0,len(mappings)):
- phi_i = mappings[k][i]
- if ( phi_i < dataset[k].order() ):
- nb_sub += 1
- mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
- mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
- median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
- median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
- return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
- size_dataset = len(dataset)
- ratio_cei_cer = cer/(cei + cer)
- threshold = size_dataset*ratio_cei_cer
- order_graph_median = median.order()
- for i in range(0,order_graph_median):
- for j in range(i+1,order_graph_median):
- s_ij = calcul_Sij(mappings,dataset,i,j)
- if(s_ij > threshold):
- median.add_edge(i,j)
- else:
- if(median.has_edge(i,j)):
- median.remove_edge(i,j)
- return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
- """Compute a graph median of a dataset according to an environment
-
- Parameters
-
- script : An gedlib initialized environnement
- listID (list): a list of ID in script: encodes the dataset
- dataset (list): corresponding graphs in networkX format. We assume that graph
- listID[i] corresponds to dataset[i]
-
- Returns:
- A networkX graph, which is the median, with corresponding sod
- """
- print(len(listID))
- median_set_index, median_set_sod = compute_median_set(script, listID)
- print(median_set_index)
- print(median_set_sod)
- sods = []
- #Ajout median dans environnement
- set_median = dataset[median_set_index].copy()
- median = dataset[median_set_index].copy()
- cur_med_id = replace_graph_in_env(script,median,-1)
- med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
- sods.append(cur_sod)
- if(verbose):
- print(cur_sod)
- ite_max = 50
- old_sod = cur_sod * 2
- ite = 0
- epsilon = 0.001
-
- best_median
- while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
- median = update_median_nodes(median,dataset, med_mappings)
- median = update_median_edges(dataset,med_mappings,median)
-
- cur_med_id = replace_graph_in_env(script,median,cur_med_id)
- med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-
-
- sods.append(cur_sod)
- if(verbose):
- print(cur_sod)
- ite += 1
- return median, cur_sod, sods, set_median
-
- draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
- 'Returns the id in listID corresponding to median set'
- #Calcul median set
- N=len(listID)
- map_id_to_index = {}
- map_index_to_id = {}
- for i in range(0,len(listID)):
- map_id_to_index[listID[i]] = i
- map_index_to_id[i] = listID[i]
-
- distances = np.zeros((N,N))
- for i in listID:
- for j in listID:
- script.PyRunMethod(i,j)
- distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
- median_set_index = np.argmin(np.sum(distances,0))
- sod = np.min(np.sum(distances,0))
-
- return median_set_index, sod
-
-if __name__ == "__main__":
- #Chargement du dataset
- script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
- script.PySetEditCost("LETTER")
- script.PyInitEnv()
- script.PySetMethod("IPFP", "")
- script.PyInitMethod()
-
- dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
- listID = script.PyGetAllGraphIds()
- median, sod = compute_median(script,listID,dataset,verbose=True)
-
- print(sod)
- draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_graph_estimator.py b/gklearn/preimage/median_graph_estimator.py
deleted file mode 100644
index b70cc61..0000000
--- a/gklearn/preimage/median_graph_estimator.py
+++ /dev/null
@@ -1,826 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Mar 16 18:04:55 2020
-
-@author: ljia
-"""
-import numpy as np
-from gklearn.preimage.common_types import AlgorithmState
-from gklearn.preimage import misc
-from gklearn.preimage.timer import Timer
-from gklearn.utils.utils import graph_isIdentical
-import time
-from tqdm import tqdm
-import sys
-import networkx as nx
-
-
-class MedianGraphEstimator(object):
-
- def __init__(self, ged_env, constant_node_costs):
- """Constructor.
-
- Parameters
- ----------
- ged_env : gklearn.gedlib.gedlibpy.GEDEnv
- Initialized GED environment. The edit costs must be set by the user.
-
- constant_node_costs : Boolean
- Set to True if the node relabeling costs are constant.
- """
- self.__ged_env = ged_env
- self.__init_method = 'BRANCH_FAST'
- self.__init_options = ''
- self.__descent_method = 'BRANCH_FAST'
- self.__descent_options = ''
- self.__refine_method = 'IPFP'
- self.__refine_options = ''
- self.__constant_node_costs = constant_node_costs
- self.__labeled_nodes = (ged_env.get_num_node_labels() > 1)
- self.__node_del_cost = ged_env.get_node_del_cost(ged_env.get_node_label(1))
- self.__node_ins_cost = ged_env.get_node_ins_cost(ged_env.get_node_label(1))
- self.__labeled_edges = (ged_env.get_num_edge_labels() > 1)
- self.__edge_del_cost = ged_env.get_edge_del_cost(ged_env.get_edge_label(1))
- self.__edge_ins_cost = ged_env.get_edge_ins_cost(ged_env.get_edge_label(1))
- self.__init_type = 'RANDOM'
- self.__num_random_inits = 10
- self.__desired_num_random_inits = 10
- self.__use_real_randomness = True
- self.__seed = 0
- self.__refine = True
- self.__time_limit_in_sec = 0
- self.__epsilon = 0.0001
- self.__max_itrs = 100
- self.__max_itrs_without_update = 3
- self.__num_inits_increase_order = 10
- self.__init_type_increase_order = 'K-MEANS++'
- self.__max_itrs_increase_order = 10
- self.__print_to_stdout = 2
- self.__median_id = np.inf # @todo: check
- self.__median_node_id_prefix = '' # @todo: check
- self.__node_maps_from_median = {}
- self.__sum_of_distances = 0
- self.__best_init_sum_of_distances = np.inf
- self.__converged_sum_of_distances = np.inf
- self.__runtime = None
- self.__runtime_initialized = None
- self.__runtime_converged = None
- self.__itrs = [] # @todo: check: {} ?
- self.__num_decrease_order = 0
- self.__num_increase_order = 0
- self.__num_converged_descents = 0
- self.__state = AlgorithmState.TERMINATED
-
- if ged_env is None:
- raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.')
- elif not ged_env.is_initialized():
- raise Exception('The GED environment is uninitialized. Call gedlibpy.GEDEnv.init() before passing it to the constructor of MedianGraphEstimator.')
-
-
- def set_options(self, options):
- """Sets the options of the estimator.
-
- Parameters
- ----------
- options : string
- String that specifies with which options to run the estimator.
- """
- self.__set_default_options()
- options_map = misc.options_string_to_options_map(options)
- for opt_name, opt_val in options_map.items():
- if opt_name == 'init-type':
- self.__init_type = opt_val
- if opt_val != 'MEDOID' and opt_val != 'RANDOM' and opt_val != 'MIN' and opt_val != 'MAX' and opt_val != 'MEAN':
- raise Exception('Invalid argument ' + opt_val + ' for option init-type. Usage: options = "[--init-type RANDOM|MEDOID|EMPTY|MIN|MAX|MEAN] [...]"')
- elif opt_name == 'random-inits':
- try:
- self.__num_random_inits = int(opt_val)
- self.__desired_num_random_inits = self.__num_random_inits
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits ]"')
-
- if self.__num_random_inits <= 0:
- raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits ]"')
-
- elif opt_name == 'randomness':
- if opt_val == 'PSEUDO':
- self.__use_real_randomness = False
-
- elif opt_val == 'REAL':
- self.__use_real_randomness = True
-
- else:
- raise Exception('Invalid argument "' + opt_val + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
-
- elif opt_name == 'stdout':
- if opt_val == '0':
- self.__print_to_stdout = 0
-
- elif opt_val == '1':
- self.__print_to_stdout = 1
-
- elif opt_val == '2':
- self.__print_to_stdout = 2
-
- else:
- raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
-
- elif opt_name == 'refine':
- if opt_val == 'TRUE':
- self.__refine = True
-
- elif opt_val == 'FALSE':
- self.__refine = False
-
- else:
- raise Exception('Invalid argument "' + opt_val + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
-
- elif opt_name == 'time-limit':
- try:
- self.__time_limit_in_sec = float(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option time-limit. Usage: options = "[--time-limit ] [...]')
-
- elif opt_name == 'max-itrs':
- try:
- self.__max_itrs = int(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs ] [...]')
-
- elif opt_name == 'max-itrs-without-update':
- try:
- self.__max_itrs_without_update = int(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update ] [...]')
-
- elif opt_name == 'seed':
- try:
- self.__seed = int(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed ] [...]')
-
- elif opt_name == 'epsilon':
- try:
- self.__epsilon = float(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon ] [...]')
-
- if self.__epsilon <= 0:
- raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon ] [...]')
-
- elif opt_name == 'inits-increase-order':
- try:
- self.__num_inits_increase_order = int(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order ]"')
-
- if self.__num_inits_increase_order <= 0:
- raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order ]"')
-
- elif opt_name == 'init-type-increase-order':
- self.__init_type_increase_order = opt_val
- if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
- raise Exception('Invalid argument ' + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
-
- elif opt_name == 'max-itrs-increase-order':
- try:
- self.__max_itrs_increase_order = int(opt_val)
-
- except:
- raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order ] [...]')
-
- else:
- valid_options = '[--init-type ] [--random-inits ] [--randomness ] [--seed ] [--stdout ] '
- valid_options += '[--time-limit ] [--max-itrs ] [--epsilon ] '
- valid_options += '[--inits-increase-order ] [--init-type-increase-order ] [--max-itrs-increase-order ]'
- raise Exception('Invalid option "' + opt_name + '". Usage: options = "' + valid_options + '"')
-
-
- def set_init_method(self, init_method, init_options=''):
- """Selects method to be used for computing the initial medoid graph.
-
- Parameters
- ----------
- init_method : string
- The selected method. Default: ged::Options::GEDMethod::BRANCH_UNIFORM.
-
- init_options : string
- The options for the selected method. Default: "".
-
- Notes
- -----
- Has no effect unless "--init-type MEDOID" is passed to set_options().
- """
- self.__init_method = init_method;
- self.__init_options = init_options;
-
-
- def set_descent_method(self, descent_method, descent_options=''):
- """Selects method to be used for block gradient descent..
-
- Parameters
- ----------
- descent_method : string
- The selected method. Default: ged::Options::GEDMethod::BRANCH_FAST.
-
- descent_options : string
- The options for the selected method. Default: "".
-
- Notes
- -----
- Has no effect unless "--init-type MEDOID" is passed to set_options().
- """
- self.__descent_method = descent_method;
- self.__descent_options = descent_options;
-
-
- def set_refine_method(self, refine_method, refine_options):
- """Selects method to be used for improving the sum of distances and the node maps for the converged median.
-
- Parameters
- ----------
- refine_method : string
- The selected method. Default: "IPFP".
-
- refine_options : string
- The options for the selected method. Default: "".
-
- Notes
- -----
- Has no effect if "--refine FALSE" is passed to set_options().
- """
- self.__refine_method = refine_method
- self.__refine_options = refine_options
-
-
- def run(self, graph_ids, set_median_id, gen_median_id):
- """Computes a generalized median graph.
-
- Parameters
- ----------
- graph_ids : list[integer]
- The IDs of the graphs for which the median should be computed. Must have been added to the environment passed to the constructor.
-
- set_median_id : integer
- The ID of the computed set-median. A dummy graph with this ID must have been added to the environment passed to the constructor. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
-
-
- gen_median_id : integer
- The ID of the computed generalized median. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
- """
- # Sanity checks.
- if len(graph_ids) == 0:
- raise Exception('Empty vector of graph IDs, unable to compute median.')
- all_graphs_empty = True
- for graph_id in graph_ids:
- if self.__ged_env.get_graph_num_nodes(graph_id) > 0:
- self.__median_node_id_prefix = self.__ged_env.get_original_node_ids(graph_id)[0]
- all_graphs_empty = False
- break
- if all_graphs_empty:
- raise Exception('All graphs in the collection are empty.')
-
- # Start timer and record start time.
- start = time.time()
- timer = Timer(self.__time_limit_in_sec)
- self.__median_id = gen_median_id
- self.__state = AlgorithmState.TERMINATED
-
- # Get ExchangeGraph representations of the input graphs.
- graphs = {}
- for graph_id in graph_ids:
- # @todo: get_nx_graph() function may need to be modified according to the coming code.
- graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False)
-# print(self.__ged_env.get_graph_internal_id(0))
-# print(graphs[0].graph)
-# print(graphs[0].nodes(data=True))
-# print(graphs[0].edges(data=True))
-# print(nx.adjacency_matrix(graphs[0]))
-
-
- # Construct initial medians.
- medians = []
- self.__construct_initial_medians(graph_ids, timer, medians)
- end_init = time.time()
- self.__runtime_initialized = end_init - start
-# print(medians[0].graph)
-# print(medians[0].nodes(data=True))
-# print(medians[0].edges(data=True))
-# print(nx.adjacency_matrix(medians[0]))
-
- # Reset information about iterations and number of times the median decreases and increases.
- self.__itrs = [0] * len(medians)
- self.__num_decrease_order = 0
- self.__num_increase_order = 0
- self.__num_converged_descents = 0
-
- # Initialize the best median.
- best_sum_of_distances = np.inf
- self.__best_init_sum_of_distances = np.inf
- node_maps_from_best_median = {}
-
- # Run block gradient descent from all initial medians.
- self.__ged_env.set_method(self.__descent_method, self.__descent_options)
- for median_pos in range(0, len(medians)):
-
- # Terminate if the timer has expired and at least one SOD has been computed.
- if timer.expired() and median_pos > 0:
- break
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('\n===========================================================')
- print('Block gradient descent for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
- print('-----------------------------------------------------------')
-
- # Get reference to the median.
- median = medians[median_pos]
-
- # Load initial median into the environment.
- self.__ged_env.load_nx_graph(median, gen_median_id)
- self.__ged_env.init(self.__ged_env.get_init_type())
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout)
-
- # Compute node maps and sum of distances for initial median.
- self.__sum_of_distances = 0
- self.__node_maps_from_median.clear() # @todo
- for graph_id in graph_ids:
- self.__ged_env.run_method(gen_median_id, graph_id)
- self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
-# print(self.__node_maps_from_median[graph_id])
- self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
-# print(self.__sum_of_distances)
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- progress.update(1)
-
- self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
- self.__ged_env.load_nx_graph(median, set_median_id)
-# print(self.__best_init_sum_of_distances)
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('\n')
-
- # Run block gradient descent from initial median.
- converged = False
- itrs_without_update = 0
- while not self.__termination_criterion_met(converged, timer, self.__itrs[median_pos], itrs_without_update):
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('\n===========================================================')
- print('Iteration', str(self.__itrs[median_pos] + 1), 'for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
- print('-----------------------------------------------------------')
-
- # Initialize flags that tell us what happened in the iteration.
- median_modified = False
- node_maps_modified = False
- decreased_order = False
- increased_order = False
-
- # Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!!
- median_modified = self.__update_median(graphs, median)
- if not median_modified or self.__itrs[median_pos] == 0:
- decreased_order = False
- if not decreased_order or self.__itrs[median_pos] == 0:
- increased_order = False
-
- # Update the number of iterations without update of the median.
- if median_modified or decreased_order or increased_order:
- itrs_without_update = 0
- else:
- itrs_without_update += 1
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('Loading median to environment: ... ', end='')
-
- # Load the median into the environment.
- # @todo: should this function use the original node label?
- self.__ged_env.load_nx_graph(median, gen_median_id)
- self.__ged_env.init(self.__ged_env.get_init_type())
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('done.')
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('Updating induced costs: ... ', end='')
-
- # Compute induced costs of the old node maps w.r.t. the updated median.
- for graph_id in graph_ids:
-# print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
- # @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- self.__ged_env.compute_induced_cost(gen_median_id, graph_id)
-# print('---------------------------------------')
-# print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('done.')
-
- # Update the node maps.
- node_maps_modified = self.__update_node_maps() # @todo
-
- # Update the order of the median if no improvement can be found with the current order.
-
- # Update the sum of distances.
- old_sum_of_distances = self.__sum_of_distances
- self.__sum_of_distances = 0
- for graph_id in self.__node_maps_from_median:
- self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above.
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('Old local SOD: ', old_sum_of_distances)
- print('New local SOD: ', self.__sum_of_distances)
- print('Best converged SOD: ', best_sum_of_distances)
- print('Modified median: ', median_modified)
- print('Modified node maps: ', node_maps_modified)
- print('Decreased order: ', decreased_order)
- print('Increased order: ', increased_order)
- print('===========================================================\n')
-
- converged = not (median_modified or node_maps_modified or decreased_order or increased_order)
-
- self.__itrs[median_pos] += 1
-
- # Update the best median.
- if self.__sum_of_distances < self.__best_init_sum_of_distances:
- best_sum_of_distances = self.__sum_of_distances
- node_maps_from_best_median = self.__node_maps_from_median
- best_median = median
-
- # Update the number of converged descents.
- if converged:
- self.__num_converged_descents += 1
-
- # Store the best encountered median.
- self.__sum_of_distances = best_sum_of_distances
- self.__node_maps_from_median = node_maps_from_best_median
- self.__ged_env.load_nx_graph(best_median, gen_median_id)
- self.__ged_env.init(self.__ged_env.get_init_type())
- end_descent = time.time()
- self.__runtime_converged = end_descent - start
-
- # Refine the sum of distances and the node maps for the converged median.
- self.__converged_sum_of_distances = self.__sum_of_distances
- if self.__refine:
- self.__improve_sum_of_distances(timer) # @todo
-
- # Record end time, set runtime and reset the number of initial medians.
- end = time.time()
- self.__runtime = end - start
- self.__num_random_inits = self.__desired_num_random_inits
-
- # Print global information.
- if self.__print_to_stdout != 0:
- print('\n===========================================================')
- print('Finished computation of generalized median graph.')
- print('-----------------------------------------------------------')
- print('Best SOD after initialization: ', self.__best_init_sum_of_distances)
- print('Converged SOD: ', self.__converged_sum_of_distances)
- if self.__refine:
- print('Refined SOD: ', self.__sum_of_distances)
- print('Overall runtime: ', self.__runtime)
- print('Runtime of initialization: ', self.__runtime_initialized)
- print('Runtime of block gradient descent: ', self.__runtime_converged - self.__runtime_initialized)
- if self.__refine:
- print('Runtime of refinement: ', self.__runtime - self.__runtime_converged)
- print('Number of initial medians: ', len(medians))
- total_itr = 0
- num_started_descents = 0
- for itr in self.__itrs:
- total_itr += itr
- if itr > 0:
- num_started_descents += 1
- print('Size of graph collection: ', len(graph_ids))
- print('Number of started descents: ', num_started_descents)
- print('Number of converged descents: ', self.__num_converged_descents)
- print('Overall number of iterations: ', total_itr)
- print('Overall number of times the order decreased: ', self.__num_decrease_order)
- print('Overall number of times the order increased: ', self.__num_increase_order)
- print('===========================================================\n')
-
-
- def get_sum_of_distances(self, state=''):
- """Returns the sum of distances.
-
- Parameters
- ----------
- state : string
- The state of the estimator. Can be 'initialized' or 'converged'. Default: ""
-
- Returns
- -------
- float
- The sum of distances (SOD) of the median when the estimator was in the state `state` during the last call to run(). If `state` is not given, the converged SOD (without refinement) or refined SOD (with refinement) is returned.
- """
- if not self.__median_available():
- raise Exception('No median has been computed. Call run() before calling get_sum_of_distances().')
- if state == 'initialized':
- return self.__best_init_sum_of_distances
- if state == 'converged':
- return self.__converged_sum_of_distances
- return self.__sum_of_distances
-
-
- def __set_default_options(self):
- self.__init_type = 'RANDOM'
- self.__num_random_inits = 10
- self.__desired_num_random_inits = 10
- self.__use_real_randomness = True
- self.__seed = 0
- self.__refine = True
- self.__time_limit_in_sec = 0
- self.__epsilon = 0.0001
- self.__max_itrs = 100
- self.__max_itrs_without_update = 3
- self.__num_inits_increase_order = 10
- self.__init_type_increase_order = 'K-MEANS++'
- self.__max_itrs_increase_order = 10
- self.__print_to_stdout = 2
-
-
- def __construct_initial_medians(self, graph_ids, timer, initial_medians):
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('\n===========================================================')
- print('Constructing initial median(s).')
- print('-----------------------------------------------------------')
-
- # Compute or sample the initial median(s).
- initial_medians.clear()
- if self.__init_type == 'MEDOID':
- self.__compute_medoid(graph_ids, timer, initial_medians)
- elif self.__init_type == 'MAX':
- pass # @todo
-# compute_max_order_graph_(graph_ids, initial_medians)
- elif self.__init_type == 'MIN':
- pass # @todo
-# compute_min_order_graph_(graph_ids, initial_medians)
- elif self.__init_type == 'MEAN':
- pass # @todo
-# compute_mean_order_graph_(graph_ids, initial_medians)
- else:
- pass # @todo
-# sample_initial_medians_(graph_ids, initial_medians)
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('===========================================================')
-
-
- def __compute_medoid(self, graph_ids, timer, initial_medians):
- # Use method selected for initialization phase.
- self.__ged_env.set_method(self.__init_method, self.__init_options)
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout)
-
- # Compute the medoid.
- medoid_id = graph_ids[0]
- best_sum_of_distances = np.inf
- for g_id in graph_ids:
- if timer.expired():
- self.__state = AlgorithmState.CALLED
- break
- sum_of_distances = 0
- for h_id in graph_ids:
- self.__ged_env.run_method(g_id, h_id)
- sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
- if sum_of_distances < best_sum_of_distances:
- best_sum_of_distances = sum_of_distances
- medoid_id = g_id
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- progress.update(1)
- initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('\n')
-
-
- def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
- if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
- if self.__state == AlgorithmState.TERMINATED:
- self.__state = AlgorithmState.INITIALIZED
- return True
- return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
-
-
- def __update_median(self, graphs, median):
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('Updating median: ', end='')
-
- # Store copy of the old median.
- old_median = median.copy() # @todo: this is just a shallow copy.
-
- # Update the node labels.
- if self.__labeled_nodes:
- self.__update_node_labels(graphs, median)
-
- # Update the edges and their labels.
- self.__update_edges(graphs, median)
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('done.')
-
- return not self.__are_graphs_equal(median, old_median)
-
-
- def __update_node_labels(self, graphs, median):
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('nodes ... ', end='')
-
- # Iterate through all nodes of the median.
- for i in range(0, nx.number_of_nodes(median)):
-# print('i: ', i)
- # Collect the labels of the substituted nodes.
- node_labels = []
- for graph_id, graph in graphs.items():
-# print('graph_id: ', graph_id)
-# print(self.__node_maps_from_median[graph_id])
- k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
-# print('k: ', k)
- if k != np.inf:
- node_labels.append(graph.nodes[k])
-
- # Compute the median label and update the median.
- if len(node_labels) > 0:
- median_label = self.__ged_env.get_median_node_label(node_labels)
- if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
- nx.set_node_attributes(median, {i: median_label})
-
-
- def __update_edges(self, graphs, median):
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('edges ... ', end='')
-
- # Clear the adjacency lists of the median and reset number of edges to 0.
- median_edges = list(median.edges)
- for (head, tail) in median_edges:
- median.remove_edge(head, tail)
-
- # @todo: what if edge is not labeled?
- # Iterate through all possible edges (i,j) of the median.
- for i in range(0, nx.number_of_nodes(median)):
- for j in range(i + 1, nx.number_of_nodes(median)):
-
- # Collect the labels of the edges to which (i,j) is mapped by the node maps.
- edge_labels = []
- for graph_id, graph in graphs.items():
- k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
- l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j)
- if k != np.inf and l != np.inf:
- if graph.has_edge(k, l):
- edge_labels.append(graph.edges[(k, l)])
-
- # Compute the median edge label and the overall edge relabeling cost.
- rel_cost = 0
- median_label = self.__ged_env.get_edge_label(1)
- if median.has_edge(i, j):
- median_label = median.edges[(i, j)]
- if self.__labeled_edges and len(edge_labels) > 0:
- new_median_label = self.__ged_env.median_edge_label(edge_labels)
- if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
- median_label = new_median_label
- for edge_label in edge_labels:
- rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label)
-
- # Update the median.
- if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs):
- median.add_edge(i, j, **median_label)
- else:
- if median.has_edge(i, j):
- median.remove_edge(i, j)
-
-
- def __update_node_maps(self):
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
-
- # Update the node maps.
- node_maps_were_modified = False
- for graph_id in self.__node_maps_from_median:
- self.__ged_env.run_method(self.__median_id, graph_id)
- if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above.
- self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned.
- node_maps_were_modified = True
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- progress.update(1)
-
- # Print information about current iteration.
- if self.__print_to_stdout == 2:
- print('\n')
-
- # Return true if the node maps were modified.
- return node_maps_were_modified
-
-
- def __improve_sum_of_distances(self, timer):
- pass
-
-
- def __median_available(self):
- return self.__median_id != np.inf
-
-
- def __get_node_image_from_map(self, node_map, node):
- """
- Return ID of the node mapping of `node` in `node_map`.
-
- Parameters
- ----------
- node_map : list[tuple(int, int)]
- List of node maps where the mapping node is found.
-
- node : int
- The mapping node of this node is returned
-
- Raises
- ------
- Exception
- If the node with ID `node` is not contained in the source nodes of the node map.
-
- Returns
- -------
- int
- ID of the mapping of `node`.
-
- Notes
- -----
- This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
- """
- if node < len(node_map):
- return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
- else:
- raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
- return np.inf
-
-
- def __are_graphs_equal(self, g1, g2):
- """
- Check if the two graphs are equal.
-
- Parameters
- ----------
- g1 : NetworkX graph object
- Graph 1 to be compared.
-
- g2 : NetworkX graph object
- Graph 2 to be compared.
-
- Returns
- -------
- bool
- True if the two graph are equal.
-
- Notes
- -----
- This is not an identical check. Here the two graphs are equal if and only if their original_node_ids, nodes, all node labels, edges and all edge labels are equal. This function is specifically designed for class `MedianGraphEstimator` and should not be used elsewhere.
- """
- # check original node ids.
- if not g1.graph['original_node_ids'] == g2.graph['original_node_ids']:
- return False
- # check nodes.
- nlist1 = [n for n in g1.nodes(data=True)]
- nlist2 = [n for n in g2.nodes(data=True)]
- if not nlist1 == nlist2:
- return False
- # check edges.
- elist1 = [n for n in g1.edges(data=True)]
- elist2 = [n for n in g2.edges(data=True)]
- if not elist1 == elist2:
- return False
-
- return True
-
-
- def compute_my_cost(g, h, node_map):
- cost = 0.0
- for node in g.nodes:
- cost += 0
-
\ No newline at end of file
diff --git a/gklearn/preimage/median_linlin.py b/gklearn/preimage/median_linlin.py
deleted file mode 100644
index 6139558..0000000
--- a/gklearn/preimage/median_linlin.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import sys
-import pathlib
-import numpy as np
-import networkx as nx
-
-from gedlibpy import librariesImport, gedlibpy
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
- """
- Replace a graph in script
-
- If old_id is -1, add a new graph to the environnemt
-
- """
- if(old_id > -1):
- script.PyClearGraph(old_id)
- new_id = script.PyAddGraph(label)
- for i in graph.nodes():
- script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
- for e in graph.edges:
- script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
- script.PyInitEnv()
- script.PySetMethod("IPFP", "")
- script.PyInitMethod()
-
- return new_id
-
-#Dessin median courrant
-def draw_Letter_graph(graph):
- import numpy as np
- import networkx as nx
- import matplotlib.pyplot as plt
- plt.figure()
- pos = {}
- for n in graph.nodes:
- pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
- nx.draw_networkx(graph,pos)
- plt.show()
-
-#compute new mappings
-def update_mappings(script,median_id,listID):
- med_distances = {}
- med_mappings = {}
- sod = 0
- for i in range(0,len(listID)):
- script.PyRunMethod(median_id,listID[i])
- med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
- med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
- sod += med_distances[i]
- return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
- s_ij = 0
- for k in range(0,len(all_mappings)):
- cur_graph = all_graphs[k]
- cur_mapping = all_mappings[k]
- size_graph = cur_graph.order()
- if ((cur_mapping[i] < size_graph) and
- (cur_mapping[j] < size_graph) and
- (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
- s_ij += 1
-
- return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-# from scipy.stats.mstats import gmean
-
-# for i in median.nodes():
-# for k in listIdSet:
-# vectors = [] #np.zeros((len(listIdSet),2))
-# if(k != median_id):
-# phi_i = mappings[k][i]
-# if(phi_i < dataset[k].order()):
-# vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-# new_labels = gmean(vectors)
-# median.node[i]['x'] = str(new_labels[0])
-# median.node[i]['y'] = str(new_labels[1])
-# return median
-
-def update_median_nodes(median,dataset,mappings):
- #update node attributes
- for i in median.nodes():
- nb_sub=0
- mean_label = {'x' : 0, 'y' : 0}
- for k in range(0,len(mappings)):
- phi_i = mappings[k][i]
- if ( phi_i < dataset[k].order() ):
- nb_sub += 1
- mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
- mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
- median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
- median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
- return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
- size_dataset = len(dataset)
- ratio_cei_cer = cer/(cei + cer)
- threshold = size_dataset*ratio_cei_cer
- order_graph_median = median.order()
- for i in range(0,order_graph_median):
- for j in range(i+1,order_graph_median):
- s_ij = calcul_Sij(mappings,dataset,i,j)
- if(s_ij > threshold):
- median.add_edge(i,j)
- else:
- if(median.has_edge(i,j)):
- median.remove_edge(i,j)
- return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
- """Compute a graph median of a dataset according to an environment
-
- Parameters
-
- script : An gedlib initialized environnement
- listID (list): a list of ID in script: encodes the dataset
- dataset (list): corresponding graphs in networkX format. We assume that graph
- listID[i] corresponds to dataset[i]
-
- Returns:
- A networkX graph, which is the median, with corresponding sod
- """
- print(len(listID))
- median_set_index, median_set_sod = compute_median_set(script, listID)
- print(median_set_index)
- print(median_set_sod)
- sods = []
- #Ajout median dans environnement
- set_median = dataset[median_set_index].copy()
- median = dataset[median_set_index].copy()
- cur_med_id = replace_graph_in_env(script,median,-1)
- med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
- sods.append(cur_sod)
- if(verbose):
- print(cur_sod)
- ite_max = 50
- old_sod = cur_sod * 2
- ite = 0
- epsilon = 0.001
-
- best_median
- while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
- median = update_median_nodes(median,dataset, med_mappings)
- median = update_median_edges(dataset,med_mappings,median)
-
- cur_med_id = replace_graph_in_env(script,median,cur_med_id)
- med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-
-
- sods.append(cur_sod)
- if(verbose):
- print(cur_sod)
- ite += 1
- return median, cur_sod, sods, set_median
-
- draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
- 'Returns the id in listID corresponding to median set'
- #Calcul median set
- N=len(listID)
- map_id_to_index = {}
- map_index_to_id = {}
- for i in range(0,len(listID)):
- map_id_to_index[listID[i]] = i
- map_index_to_id[i] = listID[i]
-
- distances = np.zeros((N,N))
- for i in listID:
- for j in listID:
- script.PyRunMethod(i,j)
- distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
- median_set_index = np.argmin(np.sum(distances,0))
- sod = np.min(np.sum(distances,0))
-
- return median_set_index, sod
-
-def _convertGraph(G):
- """Convert a graph to the proper NetworkX format that can be
- recognized by library gedlibpy.
- """
- G_new = nx.Graph()
- for nd, attrs in G.nodes(data=True):
- G_new.add_node(str(nd), chem=attrs['atom'])
-# G_new.add_node(str(nd), x=str(attrs['attributes'][0]),
-# y=str(attrs['attributes'][1]))
- for nd1, nd2, attrs in G.edges(data=True):
- G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-# G_new.add_edge(str(nd1), str(nd2))
-
- return G_new
-
-if __name__ == "__main__":
- #Chargement du dataset
- gedlibpy.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
- gedlibpy.PySetEditCost("LETTER")
- gedlibpy.PyInitEnv()
- gedlibpy.PySetMethod("IPFP", "")
- gedlibpy.PyInitMethod()
-
- dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
- listID = gedlibpy.PyGetAllGraphIds()
- median, sod = compute_median(gedlibpy,listID,dataset,verbose=True)
-
- print(sod)
- draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py
deleted file mode 100644
index dfbaef2..0000000
--- a/gklearn/preimage/median_preimage_generator.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 26 18:27:22 2020
-
-@author: ljia
-"""
-from gklearn.preimage.preimage_generator import PreimageGenerator
-# from gklearn.utils.dataset import Dataset
-
-class MedianPreimageGenerator(PreimageGenerator):
-
- def __init__(self, mge, dataset):
- self.__mge = mge
- self.__dataset = dataset
\ No newline at end of file
diff --git a/gklearn/preimage/misc.py b/gklearn/preimage/misc.py
deleted file mode 100644
index 18682c8..0000000
--- a/gklearn/preimage/misc.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 19 18:13:56 2020
-
-@author: ljia
-"""
-
-def options_string_to_options_map(options_string):
- """Transforms an options string into an options map.
-
- Parameters
- ----------
- options_string : string
- Options string of the form "[--