diff --git a/.travis.yml b/.travis.yml index 9131ede..5ee8c16 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ python: - '3.5' - '3.6' - '3.7' -- '3.8' +# - '3.8' before_install: - python --version - pip install -U pip @@ -15,7 +15,7 @@ before_install: install: - if [ $TRAVIS_PYTHON_VERSION == 3.8 ]; - then pip install -r requirements.txt; + then pip install -r gklearn/tests/requirements.txt; else pip install -r requirements.txt; fi - pip install wheel diff --git a/gklearn/preimage/test_k_closest_graphs.py b/gklearn/preimage/test_k_closest_graphs.py index e339859..152deab 100644 --- a/gklearn/preimage/test_k_closest_graphs.py +++ b/gklearn/preimage/test_k_closest_graphs.py @@ -28,7 +28,7 @@ from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance def fit_edit_cost_constants(fit_method, edit_cost_name, edit_cost_constants=None, initial_solutions=1, Gn_median=None, node_label=None, edge_label=None, - gkernel=None, dataset=None, + gkernel=None, dataset=None, init_ecc=None, Gn=None, Kmatrix_median=None): """fit edit cost constants. """ @@ -50,26 +50,32 @@ def fit_edit_cost_constants(fit_method, edit_cost_name, edit_cost_constants = random.sample(range(1, 10), 6) print('edit cost constants used:', edit_cost_constants) elif fit_method == 'expert': # expert - if edit_cost_name == 'LETTER': - edit_cost_constants = [0.9, 1.7, 0.75] - elif edit_cost_name == 'LETTER2': - edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] + if init_ecc is None: + if edit_cost_name == 'LETTER': + edit_cost_constants = [0.9, 1.7, 0.75] + elif edit_cost_name == 'LETTER2': + edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] + else: + edit_cost_constants = [3, 3, 1, 3, 3, 1] else: - edit_cost_constants = [3, 3, 1, 3, 3, 1] + edit_cost_constants = init_ecc elif fit_method == 'k-graphs': itr_max = 6 - if edit_cost_name == 'LETTER': - init_costs = [0.9, 1.7, 0.75] - elif edit_cost_name == 'LETTER2': - init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] - elif edit_cost_name == 'NON_SYMBOLIC': - init_costs = [0, 0, 1, 1, 1, 0] - if Gn_median[0].graph['node_attrs'] == []: - init_costs[2] = 0 - if Gn_median[0].graph['edge_attrs'] == []: - init_costs[5] = 0 + if init_ecc is None: + if edit_cost_name == 'LETTER': + init_costs = [0.9, 1.7, 0.75] + elif edit_cost_name == 'LETTER2': + init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] + elif edit_cost_name == 'NON_SYMBOLIC': + init_costs = [0, 0, 1, 1, 1, 0] + if Gn_median[0].graph['node_attrs'] == []: + init_costs[2] = 0 + if Gn_median[0].graph['edge_attrs'] == []: + init_costs[5] = 0 + else: + init_costs = [3, 3, 1, 3, 3, 1] else: - init_costs = [3, 3, 1, 3, 3, 1] + init_costs = init_ecc algo_options = '--threads 1 --initial-solutions ' \ + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', @@ -81,12 +87,15 @@ def fit_edit_cost_constants(fit_method, edit_cost_name, parallel=True) elif fit_method == 'whole-dataset': itr_max = 6 - if edit_cost_name == 'LETTER': - init_costs = [0.9, 1.7, 0.75] - elif edit_cost_name == 'LETTER2': - init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] + if init_ecc is None: + if edit_cost_name == 'LETTER': + init_costs = [0.9, 1.7, 0.75] + elif edit_cost_name == 'LETTER2': + init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] + else: + init_costs = [3, 3, 1, 3, 3, 1] else: - init_costs = [3, 3, 1, 3, 3, 1] + init_costs = init_ecc algo_options = '--threads 1 --initial-solutions ' \ + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', @@ -176,7 +185,7 @@ def compute_distances_to_true_median(Gn_median, fname_sm, fname_gm, def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method, graph_dir=None, initial_solutions=1, edit_cost_constants=None, group_min=None, - dataset=None, edit_cost_name=None, + dataset=None, edit_cost_name=None, init_ecc=None, Kmatrix=None, parallel=True): # dataset = dataset.lower() @@ -210,7 +219,7 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho edit_cost_constants = fit_edit_cost_constants(fit_method, edit_cost_name, edit_cost_constants=edit_cost_constants, initial_solutions=initial_solutions, Gn_median=Gn_median, node_label=node_label, edge_label=edge_label, - gkernel=gkernel, dataset=dataset, + gkernel=gkernel, dataset=dataset, init_ecc=init_ecc, Gn=Gn, Kmatrix_median=Kmatrix_median) time_fitting = time.time() - time0 diff --git a/gklearn/preimage/xp_fit_method.py b/gklearn/preimage/xp_fit_method.py index 07f8b62..996108b 100644 --- a/gklearn/preimage/xp_fit_method.py +++ b/gklearn/preimage/xp_fit_method.py @@ -25,7 +25,31 @@ def get_dataset(ds_name): graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' Gn, y_all = loadDataset(dataset, extra_params=graph_dir) for G in Gn: - reform_attributes(G) + reform_attributes(G, na_names=['x', 'y']) + G.graph['node_labels'] = [] + G.graph['edge_labels'] = [] + G.graph['node_attrs'] = ['x', 'y'] + G.graph['edge_attrs'] = [] + elif ds_name == 'Letter-med': # node non-symb + dataset = 'cpp_ext/data/collections/Letter.xml' + graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/MED/' + Gn, y_all = loadDataset(dataset, extra_params=graph_dir) + for G in Gn: + reform_attributes(G, na_names=['x', 'y']) + G.graph['node_labels'] = [] + G.graph['edge_labels'] = [] + G.graph['node_attrs'] = ['x', 'y'] + G.graph['edge_attrs'] = [] + elif ds_name == 'Letter-low': # node non-symb + dataset = 'cpp_ext/data/collections/Letter.xml' + graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/LOW/' + Gn, y_all = loadDataset(dataset, extra_params=graph_dir) + for G in Gn: + reform_attributes(G, na_names=['x', 'y']) + G.graph['node_labels'] = [] + G.graph['edge_labels'] = [] + G.graph['node_attrs'] = ['x', 'y'] + G.graph['edge_attrs'] = [] elif ds_name == 'Fingerprint': # dataset = 'cpp_ext/data/collections/Fingerprint.xml' # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' @@ -95,6 +119,7 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti ged_method = parameters['ged_method'] attr_distance = parameters['attr_distance'] fit_method = parameters['fit_method'] + init_ecc = parameters['init_ecc'] node_label = None edge_label = None @@ -165,6 +190,10 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti k = len(values) print('\n--------- k =', k, '----------') + if k < 2: + print('\nk = ', k, ', skip.\n') + continue + sod_sm_list = [] sod_gm_list = [] dis_k_sm_list = [] @@ -206,7 +235,7 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti gkernel, k, fit_method=fit_method, graph_dir=graph_dir, edit_cost_constants=None, group_min=median_set_idx_idx, dataset=ds_name, initial_solutions=initial_solutions, - edit_cost_name=edit_cost_name, + edit_cost_name=edit_cost_name, init_ecc=init_ecc, Kmatrix=Kmatrix_sub, parallel=False) sod_sm = res_sods[0] sod_gm = res_sods[1] @@ -292,7 +321,7 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='default') # plot median graphs. - if ds_name == 'Letter-high': + if ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low': set_median = loadGXL(fn_pre_sm_new + '.gxl') gen_median = loadGXL(fn_pre_gm_new + '.gxl') draw_Letter_graph(set_median, fn_pre_sm_new) @@ -544,31 +573,221 @@ if __name__ == "__main__": # Kmatrix=Kmatrix) - #### xp 5: Fingerprint, sspkernel, using LETTER2. +# #### xp 5: Fingerprint, sspkernel, using LETTER2. +# # load dataset. +# print('getting dataset and computing kernel distance matrix first...') +# ds_name = 'Fingerprint' +# gkernel = 'structuralspkernel' +# Gn, y_all, graph_dir = get_dataset(ds_name) +# # remove graphs without nodes and edges. +# Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0] +## and nx.number_of_edges(G) != 0)] +# idx = [G[0] for G in Gn] +# Gn = [G[1] for G in Gn] +# y_all = [y_all[i] for i in idx] +# y_idx = get_same_item_indices(y_all) +# # remove unused labels. +# for G in Gn: +# G.graph['edge_attrs'] = [] +# for edge in G.edges: +# del G.edges[edge]['attributes'] +# del G.edges[edge]['orient'] +# del G.edges[edge]['angle'] +# Gn = Gn[805:815] +# y_all = y_all[805:815] +# for G in Gn: +# G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' +# +# # compute/read Gram matrix and pair distances. +# Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') +# np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', +# Kmatrix=Kmatrix) +## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') +## Kmatrix = gmfile['Kmatrix'] +## run_time = gmfile['run_time'] +## Kmatrix = Kmatrix[[0,1,2,3,4],:] +## Kmatrix = Kmatrix[:,[0,1,2,3,4]] +## print('\nTime to compute Gram matrix for the whole dataset: ', run_time) +# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, +# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) +## Kmatrix = np.zeros((len(Gn), len(Gn))) +## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 +# +# # compute pair distances. +## dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, +## Kmatrix=None, gkernel=gkernel, verbose=True) +## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 +# # fitting and computing. +# fit_methods = ['k-graphs', 'random', 'random', 'random'] +# for fit_method in fit_methods: +# print('\n-------------------------------------') +# print('fit method:', fit_method) +# parameters = {'ds_name': ds_name, +# 'gkernel': gkernel, +# 'edit_cost_name': 'LETTER2', +# 'ged_method': 'mIPFP', +# 'attr_distance': 'euclidean', +# 'fit_method': fit_method} +# xp_fit_method_for_non_symbolic(parameters, save_results=True, +# initial_solutions=40, +# Gn_data = [Gn, y_all, graph_dir], +# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], +# Kmatrix=Kmatrix) + + +# #### xp 6: Letter-med, sspkernel. +# # load dataset. +# print('getting dataset and computing kernel distance matrix first...') +# ds_name = 'Letter-med' +# gkernel = 'structuralspkernel' +# Gn, y_all, graph_dir = get_dataset(ds_name) +## Gn = Gn[0:50] +## y_all = y_all[0:50] +# +# # compute/read Gram matrix and pair distances. +# Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') +# np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', +# Kmatrix=Kmatrix) +## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') +## Kmatrix = gmfile['Kmatrix'] +## run_time = gmfile['run_time'] +## Kmatrix = Kmatrix[[0,1,2,3,4],:] +## Kmatrix = Kmatrix[:,[0,1,2,3,4]] +## print('\nTime to compute Gram matrix for the whole dataset: ', run_time) +# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, +# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) +## Kmatrix = np.zeros((len(Gn), len(Gn))) +## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 +# +# # fitting and computing. +# fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] +# for fit_method in fit_methods: +# print('\n-------------------------------------') +# print('fit method:', fit_method) +# parameters = {'ds_name': ds_name, +# 'gkernel': gkernel, +# 'edit_cost_name': 'LETTER2', +# 'ged_method': 'mIPFP', +# 'attr_distance': 'euclidean', +# 'fit_method': fit_method, +# 'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]} +# print('parameters: ', parameters) +# xp_fit_method_for_non_symbolic(parameters, save_results=True, +# initial_solutions=40, +# Gn_data = [Gn, y_all, graph_dir], +# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], +# Kmatrix=Kmatrix) + + +# #### xp 7: Letter-low, sspkernel. +# # load dataset. +# print('getting dataset and computing kernel distance matrix first...') +# ds_name = 'Letter-low' +# gkernel = 'structuralspkernel' +# Gn, y_all, graph_dir = get_dataset(ds_name) +## Gn = Gn[0:50] +## y_all = y_all[0:50] +# +# # compute/read Gram matrix and pair distances. +# Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') +# np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', +# Kmatrix=Kmatrix) +## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') +## Kmatrix = gmfile['Kmatrix'] +## run_time = gmfile['run_time'] +## Kmatrix = Kmatrix[[0,1,2,3,4],:] +## Kmatrix = Kmatrix[:,[0,1,2,3,4]] +## print('\nTime to compute Gram matrix for the whole dataset: ', run_time) +# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, +# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) +## Kmatrix = np.zeros((len(Gn), len(Gn))) +## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 +# +# # fitting and computing. +# fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] +# for fit_method in fit_methods: +# print('\n-------------------------------------') +# print('fit method:', fit_method) +# parameters = {'ds_name': ds_name, +# 'gkernel': gkernel, +# 'edit_cost_name': 'LETTER2', +# 'ged_method': 'mIPFP', +# 'attr_distance': 'euclidean', +# 'fit_method': fit_method, +# 'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]} +# print('parameters: ', parameters) +# xp_fit_method_for_non_symbolic(parameters, save_results=True, +# initial_solutions=40, +# Gn_data = [Gn, y_all, graph_dir], +# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], +# Kmatrix=Kmatrix) + + +# #### xp 8: Letter-med, spkernel. +# # load dataset. +# print('getting dataset and computing kernel distance matrix first...') +# ds_name = 'Letter-med' +# gkernel = 'spkernel' +# Gn, y_all, graph_dir = get_dataset(ds_name) +# # remove graphs without nodes and edges. +# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 +# and nx.number_of_edges(G) != 0)] +# idx = [G[0] for G in Gn] +# Gn = [G[1] for G in Gn] +# y_all = [y_all[i] for i in idx] +## Gn = Gn[0:50] +## y_all = y_all[0:50] +# +# # compute/read Gram matrix and pair distances. +# Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') +# np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', +# Kmatrix=Kmatrix) +## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') +## Kmatrix = gmfile['Kmatrix'] +## run_time = gmfile['run_time'] +## Kmatrix = Kmatrix[[0,1,2,3,4],:] +## Kmatrix = Kmatrix[:,[0,1,2,3,4]] +## print('\nTime to compute Gram matrix for the whole dataset: ', run_time) +# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, +# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) +## Kmatrix = np.zeros((len(Gn), len(Gn))) +## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 +# +# # fitting and computing. +# fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] +# for fit_method in fit_methods: +# print('\n-------------------------------------') +# print('fit method:', fit_method) +# parameters = {'ds_name': ds_name, +# 'gkernel': gkernel, +# 'edit_cost_name': 'LETTER2', +# 'ged_method': 'mIPFP', +# 'attr_distance': 'euclidean', +# 'fit_method': fit_method, +# 'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]} +# print('parameters: ', parameters) +# xp_fit_method_for_non_symbolic(parameters, save_results=True, +# initial_solutions=40, +# Gn_data = [Gn, y_all, graph_dir], +# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], +# Kmatrix=Kmatrix) + + + #### xp 9: Letter-low, spkernel. # load dataset. print('getting dataset and computing kernel distance matrix first...') - ds_name = 'Fingerprint' - gkernel = 'structuralspkernel' + ds_name = 'Letter-low' + gkernel = 'spkernel' Gn, y_all, graph_dir = get_dataset(ds_name) # remove graphs without nodes and edges. - Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0)] -# and nx.number_of_edges(G) != 0)] + Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 + and nx.number_of_edges(G) != 0)] idx = [G[0] for G in Gn] Gn = [G[1] for G in Gn] y_all = [y_all[i] for i in idx] - y_idx = get_same_item_indices(y_all) - # remove unused labels. - for G in Gn: - G.graph['edge_attrs'] = [] - for edge in G.edges: - del G.edges[edge]['attributes'] - del G.edges[edge]['orient'] - del G.edges[edge]['angle'] - Gn = Gn[805:815] - y_all = y_all[805:815] - for G in Gn: - G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' - +# Gn = Gn[0:50] +# y_all = y_all[0:50] + # compute/read Gram matrix and pair distances. Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', @@ -583,11 +802,7 @@ if __name__ == "__main__": Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) # Kmatrix = np.zeros((len(Gn), len(Gn))) # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 - - # compute pair distances. -# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, -# Kmatrix=None, gkernel=gkernel, verbose=True) -# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 + # fitting and computing. fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] for fit_method in fit_methods: @@ -598,7 +813,9 @@ if __name__ == "__main__": 'edit_cost_name': 'LETTER2', 'ged_method': 'mIPFP', 'attr_distance': 'euclidean', - 'fit_method': fit_method} + 'fit_method': fit_method, + 'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]} + print('parameters: ', parameters) xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=40, Gn_data = [Gn, y_all, graph_dir],