#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Oct 20 11:48:02 2020 @author: ljia """ # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. import os import multiprocessing import pickle import logging from gklearn.ged.util import compute_geds import time import sys from group_results import group_trials def generate_graphs(): from gklearn.utils.graph_synthesizer import GraphSynthesizer gsyzer = GraphSynthesizer() graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) return graphs def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial): save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) # Return if the file exists. if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): return None, None """**2. Set parameters.**""" # Parameters for GED computation. ged_options = {'method': 'IPFP', # use IPFP huristic. 'initialization_method': 'RANDOM', # or 'NODE', etc. # when bigger than 1, then the method is considered mIPFP. 'initial_solutions': 1, 'edit_cost': 'CONSTANT', # use CONSTANT cost. # the distance between non-symbolic node/edge labels is computed by euclidean distance. 'attr_distance': 'euclidean', 'ratio_runs_from_initial_solutions': 1, # parallel threads. Do not work if mpg_options['parallel'] = False. 'threads': multiprocessing.cpu_count(), 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' } edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) options = ged_options.copy() options['edit_cost_constants'] = edit_cost_constants options['node_labels'] = [] options['edge_labels'] = [] options['node_attrs'] = [] options['edge_attrs'] = [] parallel = True # if num_solutions == 1 else False """**5. Compute GED matrix.**""" ged_mat = 'error' runtime = 0 try: time0 = time.time() ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True) runtime = time.time() - time0 except Exception as exp: print('An exception occured when running this experiment:') LOG_FILENAME = save_dir + 'error.txt' logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) logging.exception(save_file_suffix) print(repr(exp)) """**6. Get results.**""" with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(ged_mat, f) with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(runtime, f) return ged_mat, runtime def save_trials_as_group(graphs, N, repeats, ratio): # Return if the group file exists. name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' if os.path.isfile(name_group): return ged_mats = [] runtimes = [] for trial in range(1, 101): print() print('Trial:', trial) ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial) ged_mats.append(ged_mat) runtimes.append(runtime) # Group trials and Remove single files. name_prefix = 'ged_matrix' + name_middle group_trials(save_dir, name_prefix, True, True, False) name_prefix = 'runtime' + name_middle group_trials(save_dir, name_prefix, True, True, False) def results_for_a_ratio(ratio): for N in N_list: print() print('# of graphs:', N) for repeats in [1, 20, 40, 60, 80, 100]: print() print('Repeats:', repeats) save_trials_as_group(graphs[:N], N, repeats, ratio) if __name__ == '__main__': if len(sys.argv) > 1: N_list = [int(i) for i in sys.argv[1:]] else: N_list = [10, 50, 100] # Generate graphs. graphs = generate_graphs() save_dir = 'outputs/edit_costs.repeats.N.IPFP/' os.makedirs(save_dir, exist_ok=True) os.makedirs(save_dir + 'groups/', exist_ok=True) for ratio in [10, 1, 0.1]: print() print('Ratio:', ratio) results_for_a_ratio(ratio)