From f337fc3fa917fd18222882bb2ca6978089173812 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Tue, 14 Apr 2020 16:50:00 +0200 Subject: [PATCH] Add class WeisfeilerLehman. --- gklearn/kernels/__init__.py | 1 + gklearn/kernels/weisfeilerLehmanKernel.py | 1026 ++++++++++---------- gklearn/preimage/experiments/xp_median_preimage.py | 191 +++- gklearn/preimage/median_preimage_generator.py | 8 +- gklearn/tests/test_graph_kernels.py | 44 +- 5 files changed, 734 insertions(+), 536 deletions(-) diff --git a/gklearn/kernels/__init__.py b/gklearn/kernels/__init__.py index 4afcc67..52df2b6 100644 --- a/gklearn/kernels/__init__.py +++ b/gklearn/kernels/__init__.py @@ -12,3 +12,4 @@ from gklearn.kernels.structural_sp import StructuralSP from gklearn.kernels.shortest_path import ShortestPath from gklearn.kernels.path_up_to_h import PathUpToH from gklearn.kernels.treelet import Treelet +from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman diff --git a/gklearn/kernels/weisfeilerLehmanKernel.py b/gklearn/kernels/weisfeilerLehmanKernel.py index cb8c592..ecbcf49 100644 --- a/gklearn/kernels/weisfeilerLehmanKernel.py +++ b/gklearn/kernels/weisfeilerLehmanKernel.py @@ -3,9 +3,9 @@ @references: - [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. - Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. - 2011;12(Sep):2539-61. + [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. + Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. + 2011;12(Sep):2539-61. """ import sys @@ -24,546 +24,546 @@ from gklearn.utils.parallel import parallel_gm # @todo: support edge kernel, sp kernel, user-defined kernel. def weisfeilerlehmankernel(*args, - node_label='atom', - edge_label='bond_type', - height=0, - base_kernel='subtree', - parallel=None, - n_jobs=None, - verbose=True): - """Calculate Weisfeiler-Lehman kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - - G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. - - node_label : string - Node attribute used as label. The default node label is atom. - - edge_label : string - Edge attribute used as label. The default edge label is bond_type. - - height : int - Subtree height. - - base_kernel : string - Base kernel used in each iteration of WL kernel. Only default 'subtree' - kernel can be applied for now. - - parallel : None - Which paralleliztion method is applied to compute the kernel. No - parallelization can be applied for now. - - n_jobs : int - Number of jobs for parallelization. The default is to use all - computational cores. This argument is only valid when one of the - parallelization method is applied and can be ignored for now. - - Return - ------ - Kmatrix : Numpy matrix - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - - Notes - ----- - This function now supports WL subtree kernel only. - """ -# The default base -# kernel is subtree kernel. For user-defined kernel, base_kernel is the -# name of the base kernel function used in each iteration of WL kernel. -# This function returns a Numpy matrix, each element of which is the -# user-defined Weisfeiler-Lehman kernel between 2 praphs. - # pre-process - base_kernel = base_kernel.lower() - Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list - Gn = [g.copy() for g in Gn] - ds_attrs = get_dataset_attributes(Gn, attr_names=['node_labeled'], - node_label=node_label) - if not ds_attrs['node_labeled']: - for G in Gn: - nx.set_node_attributes(G, '0', 'atom') - - start_time = time.time() - - # for WL subtree kernel - if base_kernel == 'subtree': - Kmatrix = _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose) - - # for WL shortest path kernel - elif base_kernel == 'sp': - Kmatrix = _wl_spkernel_do(Gn, node_label, edge_label, height) - - # for WL edge kernel - elif base_kernel == 'edge': - Kmatrix = _wl_edgekernel_do(Gn, node_label, edge_label, height) - - # for user defined base kernel - else: - Kmatrix = _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel) - - run_time = time.time() - start_time - if verbose: - print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" - % (base_kernel, len(args[0]), run_time)) - - return Kmatrix, run_time + node_label='atom', + edge_label='bond_type', + height=0, + base_kernel='subtree', + parallel=None, + n_jobs=None, + verbose=True): + """Calculate Weisfeiler-Lehman kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + + G1, G2 : NetworkX graphs + Two graphs between which the kernel is calculated. + + node_label : string + Node attribute used as label. The default node label is atom. + + edge_label : string + Edge attribute used as label. The default edge label is bond_type. + + height : int + Subtree height. + + base_kernel : string + Base kernel used in each iteration of WL kernel. Only default 'subtree' + kernel can be applied for now. + + parallel : None + Which paralleliztion method is applied to compute the kernel. No + parallelization can be applied for now. + + n_jobs : int + Number of jobs for parallelization. The default is to use all + computational cores. This argument is only valid when one of the + parallelization method is applied and can be ignored for now. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + + Notes + ----- + This function now supports WL subtree kernel only. + """ +# The default base +# kernel is subtree kernel. For user-defined kernel, base_kernel is the +# name of the base kernel function used in each iteration of WL kernel. +# This function returns a Numpy matrix, each element of which is the +# user-defined Weisfeiler-Lehman kernel between 2 praphs. + # pre-process + base_kernel = base_kernel.lower() + Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list + Gn = [g.copy() for g in Gn] + ds_attrs = get_dataset_attributes(Gn, attr_names=['node_labeled'], + node_label=node_label) + if not ds_attrs['node_labeled']: + for G in Gn: + nx.set_node_attributes(G, '0', 'atom') + + start_time = time.time() + + # for WL subtree kernel + if base_kernel == 'subtree': + Kmatrix = _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose) + + # for WL shortest path kernel + elif base_kernel == 'sp': + Kmatrix = _wl_spkernel_do(Gn, node_label, edge_label, height) + + # for WL edge kernel + elif base_kernel == 'edge': + Kmatrix = _wl_edgekernel_do(Gn, node_label, edge_label, height) + + # for user defined base kernel + else: + Kmatrix = _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel) + + run_time = time.time() - start_time + if verbose: + print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" + % (base_kernel, len(args[0]), run_time)) + + return Kmatrix, run_time def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose): - """Calculate Weisfeiler-Lehman kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - node_label : string - node attribute used as label. - edge_label : string - edge attribute used as label. - height : int - wl height. - - Return - ------ - Kmatrix : Numpy matrix - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - """ - height = int(height) - Kmatrix = np.zeros((len(Gn), len(Gn))) - - # initial for height = 0 - all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration - - # for each graph - for G in Gn: - # get the set of original labels - labels_ori = list(nx.get_node_attributes(G, node_label).values()) - # number of occurence of each label in G - all_num_of_each_label.append(dict(Counter(labels_ori))) - - # calculate subtree kernel with the 0th iteration and add it to the final kernel - compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, False) - - # iterate each height - for h in range(1, height + 1): - all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration - num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs -# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration - all_num_of_each_label = [] # number of occurence of each label in G - -# # for each graph -# # ---- use pool.imap_unordered to parallel and track progress. ---- -# pool = Pool(n_jobs) -# itr = zip(Gn, range(0, len(Gn))) -# if len(Gn) < 100 * n_jobs: -# chunksize = int(len(Gn) / n_jobs) + 1 -# else: -# chunksize = 100 -# all_multisets_list = [[] for _ in range(len(Gn))] -## set_unique_list = [[] for _ in range(len(Gn))] -# get_partial = partial(wrapper_wl_iteration, node_label) -## if verbose: -## iterator = tqdm(pool.imap_unordered(get_partial, itr, chunksize), -## desc='wl iteration', file=sys.stdout) -## else: -# iterator = pool.imap_unordered(get_partial, itr, chunksize) -# for i, all_multisets in iterator: -# all_multisets_list[i] = all_multisets -## set_unique_list[i] = set_unique -## all_set_unique = all_set_unique | set(set_unique) -# pool.close() -# pool.join() - -# all_set_unique = set() -# for uset in all_multisets_list: -# all_set_unique = all_set_unique | set(uset) -# -# all_set_unique = list(all_set_unique) -## # a dictionary mapping original labels to new ones. -## set_compressed = {} -## for idx, uset in enumerate(all_set_unique): -## set_compressed.update({uset: idx}) -# -# for ig, G in enumerate(Gn): + """Calculate Weisfeiler-Lehman kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + wl height. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + """ + height = int(height) + Kmatrix = np.zeros((len(Gn), len(Gn))) + + # initial for height = 0 + all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration + + # for each graph + for G in Gn: + # get the set of original labels + labels_ori = list(nx.get_node_attributes(G, node_label).values()) + # number of occurence of each label in G + all_num_of_each_label.append(dict(Counter(labels_ori))) + + # calculate subtree kernel with the 0th iteration and add it to the final kernel + compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, False) + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs +# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration + all_num_of_each_label = [] # number of occurence of each label in G + +# # for each graph +# # ---- use pool.imap_unordered to parallel and track progress. ---- +# pool = Pool(n_jobs) +# itr = zip(Gn, range(0, len(Gn))) +# if len(Gn) < 100 * n_jobs: +# chunksize = int(len(Gn) / n_jobs) + 1 +# else: +# chunksize = 100 +# all_multisets_list = [[] for _ in range(len(Gn))] +## set_unique_list = [[] for _ in range(len(Gn))] +# get_partial = partial(wrapper_wl_iteration, node_label) +## if verbose: +## iterator = tqdm(pool.imap_unordered(get_partial, itr, chunksize), +## desc='wl iteration', file=sys.stdout) +## else: +# iterator = pool.imap_unordered(get_partial, itr, chunksize) +# for i, all_multisets in iterator: +# all_multisets_list[i] = all_multisets +## set_unique_list[i] = set_unique +## all_set_unique = all_set_unique | set(set_unique) +# pool.close() +# pool.join() + +# all_set_unique = set() +# for uset in all_multisets_list: +# all_set_unique = all_set_unique | set(uset) +# +# all_set_unique = list(all_set_unique) +## # a dictionary mapping original labels to new ones. +## set_compressed = {} +## for idx, uset in enumerate(all_set_unique): +## set_compressed.update({uset: idx}) +# +# for ig, G in enumerate(Gn): # -## # a dictionary mapping original labels to new ones. -## set_compressed = {} -## # if a label occured before, assign its former compressed label, -## # else assign the number of labels occured + 1 as the compressed label. -## for value in set_unique_list[i]: -## if uset in all_set_unique: -## set_compressed.update({uset: all_set_compressed[value]}) -## else: -## set_compressed.update({value: str(num_of_labels_occured + 1)}) -## num_of_labels_occured += 1 -# -## all_set_compressed.update(set_compressed) -# -# # relabel nodes -# for idx, node in enumerate(G.nodes()): -# G.nodes[node][node_label] = all_set_unique.index(all_multisets_list[ig][idx]) -# -# # get the set of compressed labels -# labels_comp = list(nx.get_node_attributes(G, node_label).values()) -## all_labels_ori.update(labels_comp) -# all_num_of_each_label[ig] = dict(Counter(labels_comp)) - - - - -# all_set_unique = list(all_set_unique) - - - # @todo: parallel this part. - for idx, G in enumerate(Gn): - - all_multisets = [] - for node, attrs in G.nodes(data=True): - # Multiset-label determination. - multiset = [G.nodes[neighbors][node_label] for neighbors in G[node]] - # sorting each multiset - multiset.sort() - multiset = [attrs[node_label]] + multiset # add the prefix - all_multisets.append(tuple(multiset)) - - # label compression - set_unique = list(set(all_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. - set_compressed = {} - # if a label occured before, assign its former compressed label, - # else assign the number of labels occured + 1 as the compressed label. - for value in set_unique: - if value in all_set_compressed.keys(): - set_compressed.update({value: all_set_compressed[value]}) - else: - set_compressed.update({value: str(num_of_labels_occured + 1)}) - num_of_labels_occured += 1 - - all_set_compressed.update(set_compressed) - - # relabel nodes - for idx, node in enumerate(G.nodes()): - G.nodes[node][node_label] = set_compressed[all_multisets[idx]] - - # get the set of compressed labels - labels_comp = list(nx.get_node_attributes(G, node_label).values()) -# all_labels_ori.update(labels_comp) - all_num_of_each_label.append(dict(Counter(labels_comp))) - - # calculate subtree kernel with h iterations and add it to the final kernel - compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, False) - - return Kmatrix +## # a dictionary mapping original labels to new ones. +## set_compressed = {} +## # if a label occured before, assign its former compressed label, +## # else assign the number of labels occured + 1 as the compressed label. +## for value in set_unique_list[i]: +## if uset in all_set_unique: +## set_compressed.update({uset: all_set_compressed[value]}) +## else: +## set_compressed.update({value: str(num_of_labels_occured + 1)}) +## num_of_labels_occured += 1 +# +## all_set_compressed.update(set_compressed) +# +# # relabel nodes +# for idx, node in enumerate(G.nodes()): +# G.nodes[node][node_label] = all_set_unique.index(all_multisets_list[ig][idx]) +# +# # get the set of compressed labels +# labels_comp = list(nx.get_node_attributes(G, node_label).values()) +## all_labels_ori.update(labels_comp) +# all_num_of_each_label[ig] = dict(Counter(labels_comp)) + + + + +# all_set_unique = list(all_set_unique) + + + # @todo: parallel this part. + for idx, G in enumerate(Gn): + + all_multisets = [] + for node, attrs in G.nodes(data=True): + # Multiset-label determination. + multiset = [G.nodes[neighbors][node_label] for neighbors in G[node]] + # sorting each multiset + multiset.sort() + multiset = [attrs[node_label]] + multiset # add the prefix + all_multisets.append(tuple(multiset)) + + # label compression + set_unique = list(set(all_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, + # else assign the number of labels occured + 1 as the compressed label. + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({value: all_set_compressed[value]}) + else: + set_compressed.update({value: str(num_of_labels_occured + 1)}) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for idx, node in enumerate(G.nodes()): + G.nodes[node][node_label] = set_compressed[all_multisets[idx]] + + # get the set of compressed labels + labels_comp = list(nx.get_node_attributes(G, node_label).values()) +# all_labels_ori.update(labels_comp) + all_num_of_each_label.append(dict(Counter(labels_comp))) + + # calculate subtree kernel with h iterations and add it to the final kernel + compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, False) + + return Kmatrix def wl_iteration(G, node_label): - all_multisets = [] - for node, attrs in G.nodes(data=True): - # Multiset-label determination. - multiset = [G.nodes[neighbors][node_label] for neighbors in G[node]] - # sorting each multiset - multiset.sort() - multiset = [attrs[node_label]] + multiset # add the prefix - all_multisets.append(tuple(multiset)) -# # label compression -# set_unique = list(set(all_multisets)) # set of unique multiset labels - return all_multisets - -# # a dictionary mapping original labels to new ones. -# set_compressed = {} -# # if a label occured before, assign its former compressed label, -# # else assign the number of labels occured + 1 as the compressed label. -# for value in set_unique: -# if value in all_set_compressed.keys(): -# set_compressed.update({value: all_set_compressed[value]}) -# else: -# set_compressed.update({value: str(num_of_labels_occured + 1)}) -# num_of_labels_occured += 1 + all_multisets = [] + for node, attrs in G.nodes(data=True): + # Multiset-label determination. + multiset = [G.nodes[neighbors][node_label] for neighbors in G[node]] + # sorting each multiset + multiset.sort() + multiset = [attrs[node_label]] + multiset # add the prefix + all_multisets.append(tuple(multiset)) +# # label compression +# set_unique = list(set(all_multisets)) # set of unique multiset labels + return all_multisets + +# # a dictionary mapping original labels to new ones. +# set_compressed = {} +# # if a label occured before, assign its former compressed label, +# # else assign the number of labels occured + 1 as the compressed label. +# for value in set_unique: +# if value in all_set_compressed.keys(): +# set_compressed.update({value: all_set_compressed[value]}) +# else: +# set_compressed.update({value: str(num_of_labels_occured + 1)}) +# num_of_labels_occured += 1 # -# all_set_compressed.update(set_compressed) +# all_set_compressed.update(set_compressed) # -# # relabel nodes -# for idx, node in enumerate(G.nodes()): -# G.nodes[node][node_label] = set_compressed[all_multisets[idx]] +# # relabel nodes +# for idx, node in enumerate(G.nodes()): +# G.nodes[node][node_label] = set_compressed[all_multisets[idx]] # -# # get the set of compressed labels -# labels_comp = list(nx.get_node_attributes(G, node_label).values()) -# all_labels_ori.update(labels_comp) -# all_num_of_each_label.append(dict(Counter(labels_comp))) -# return +# # get the set of compressed labels +# labels_comp = list(nx.get_node_attributes(G, node_label).values()) +# all_labels_ori.update(labels_comp) +# all_num_of_each_label.append(dict(Counter(labels_comp))) +# return def wrapper_wl_iteration(node_label, itr_item): - g = itr_item[0] - i = itr_item[1] - all_multisets = wl_iteration(g, node_label) - return i, all_multisets + g = itr_item[0] + i = itr_item[1] + all_multisets = wl_iteration(g, node_label) + return i, all_multisets def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, verbose): - """Compute kernel matrix using the base kernel. - """ - if parallel == 'imap_unordered': - # compute kernels. - def init_worker(alllabels_toshare): - global G_alllabels - G_alllabels = alllabels_toshare - do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) - parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(all_num_of_each_label,), n_jobs=n_jobs, verbose=verbose) - elif parallel == None: - for i in range(len(Kmatrix)): - for j in range(i, len(Kmatrix)): - Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], - all_num_of_each_label[j], Kmatrix[i][j]) - Kmatrix[j][i] = Kmatrix[i][j] + """Compute kernel matrix using the base kernel. + """ + if parallel == 'imap_unordered': + # compute kernels. + def init_worker(alllabels_toshare): + global G_alllabels + G_alllabels = alllabels_toshare + do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) + parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, + glbv=(all_num_of_each_label,), n_jobs=n_jobs, verbose=verbose) + elif parallel == None: + for i in range(len(Kmatrix)): + for j in range(i, len(Kmatrix)): + Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], + all_num_of_each_label[j], Kmatrix[i][j]) + Kmatrix[j][i] = Kmatrix[i][j] def compute_subtree_kernel(num_of_each_label1, num_of_each_label2, kernel): - """Compute the subtree kernel. - """ - labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) - vector1 = np.array([(num_of_each_label1[label] - if (label in num_of_each_label1.keys()) else 0) - for label in labels]) - vector2 = np.array([(num_of_each_label2[label] - if (label in num_of_each_label2.keys()) else 0) - for label in labels]) - kernel += np.dot(vector1, vector2) - return kernel + """Compute the subtree kernel. + """ + labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) + vector1 = np.array([(num_of_each_label1[label] + if (label in num_of_each_label1.keys()) else 0) + for label in labels]) + vector2 = np.array([(num_of_each_label2[label] + if (label in num_of_each_label2.keys()) else 0) + for label in labels]) + kernel += np.dot(vector1, vector2) + return kernel def wrapper_compute_subtree_kernel(Kmatrix, itr): - i = itr[0] - j = itr[1] - return i, j, compute_subtree_kernel(G_alllabels[i], G_alllabels[j], Kmatrix[i][j]) - + i = itr[0] + j = itr[1] + return i, j, compute_subtree_kernel(G_alllabels[i], G_alllabels[j], Kmatrix[i][j]) + def _wl_spkernel_do(Gn, node_label, edge_label, height): - """Calculate Weisfeiler-Lehman shortest path kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - node_label : string - node attribute used as label. - edge_label : string - edge attribute used as label. - height : int - subtree height. - - Return - ------ - Kmatrix : Numpy matrix - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - """ - pass - from gklearn.utils.utils import getSPGraph - - # init. - height = int(height) - Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel - - Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn - - # initial for height = 0 - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] = Kmatrix[i][j] - - # iterate each height - for h in range(1, height + 1): - all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration - num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs - for G in Gn: # for each graph - set_multisets = [] - for node in G.nodes(data = True): - # Multiset-label determination. - multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] - # sorting each multiset - multiset.sort() - multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) - - # label compression - set_unique = list(set(set_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. - set_compressed = {} - # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label - for value in set_unique: - if value in all_set_compressed.keys(): - set_compressed.update({ value : all_set_compressed[value] }) - else: - set_compressed.update({ value : str(num_of_labels_occured + 1) }) - num_of_labels_occured += 1 - - all_set_compressed.update(set_compressed) - - # relabel nodes - for node in G.nodes(data = True): - node[1][node_label] = set_compressed[set_multisets[node[0]]] - - # calculate subtree kernel with h iterations and add it to the final kernel - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] = Kmatrix[i][j] - - return Kmatrix + """Calculate Weisfeiler-Lehman shortest path kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + subtree height. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + """ + pass + from gklearn.utils.utils import getSPGraph + + # init. + height = int(height) + Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel + + Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn + + # initial for height = 0 + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + for G in Gn: # for each graph + set_multisets = [] + for node in G.nodes(data = True): + # Multiset-label determination. + multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] + # sorting each multiset + multiset.sort() + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + + # label compression + set_unique = list(set(set_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({ value : all_set_compressed[value] }) + else: + set_compressed.update({ value : str(num_of_labels_occured + 1) }) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for node in G.nodes(data = True): + node[1][node_label] = set_compressed[set_multisets[node[0]]] + + # calculate subtree kernel with h iterations and add it to the final kernel + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + return Kmatrix def _wl_edgekernel_do(Gn, node_label, edge_label, height): - """Calculate Weisfeiler-Lehman edge kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - node_label : string - node attribute used as label. - edge_label : string - edge attribute used as label. - height : int - subtree height. - - Return - ------ - Kmatrix : Numpy matrix - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - """ - pass - # init. - height = int(height) - Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel + """Calculate Weisfeiler-Lehman edge kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + subtree height. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + """ + pass + # init. + height = int(height) + Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel - # initial for height = 0 - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): - if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] = Kmatrix[i][j] - - # iterate each height - for h in range(1, height + 1): - all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration - num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs - for G in Gn: # for each graph - set_multisets = [] - for node in G.nodes(data = True): - # Multiset-label determination. - multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] - # sorting each multiset - multiset.sort() - multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) - - # label compression - set_unique = list(set(set_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. - set_compressed = {} - # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label - for value in set_unique: - if value in all_set_compressed.keys(): - set_compressed.update({ value : all_set_compressed[value] }) - else: - set_compressed.update({ value : str(num_of_labels_occured + 1) }) - num_of_labels_occured += 1 - - all_set_compressed.update(set_compressed) - - # relabel nodes - for node in G.nodes(data = True): - node[1][node_label] = set_compressed[set_multisets[node[0]]] - - # calculate subtree kernel with h iterations and add it to the final kernel - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): - if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] = Kmatrix[i][j] - - return Kmatrix + # initial for height = 0 + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + for G in Gn: # for each graph + set_multisets = [] + for node in G.nodes(data = True): + # Multiset-label determination. + multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] + # sorting each multiset + multiset.sort() + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + + # label compression + set_unique = list(set(set_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({ value : all_set_compressed[value] }) + else: + set_compressed.update({ value : str(num_of_labels_occured + 1) }) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for node in G.nodes(data = True): + node[1][node_label] = set_compressed[set_multisets[node[0]]] + + # calculate subtree kernel with h iterations and add it to the final kernel + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + return Kmatrix def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): - """Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - node_label : string - node attribute used as label. - edge_label : string - edge attribute used as label. - height : int - subtree height. - base_kernel : string - Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. - - Return - ------ - Kmatrix : Numpy matrix - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - """ - pass - # init. - height = int(height) - Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel + """Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + subtree height. + base_kernel : string + Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + """ + pass + # init. + height = int(height) + Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel - # initial for height = 0 - Kmatrix = base_kernel(Gn, node_label, edge_label) - - # iterate each height - for h in range(1, height + 1): - all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration - num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs - for G in Gn: # for each graph - set_multisets = [] - for node in G.nodes(data = True): - # Multiset-label determination. - multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] - # sorting each multiset - multiset.sort() - multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) - - # label compression - set_unique = list(set(set_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. - set_compressed = {} - # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label - for value in set_unique: - if value in all_set_compressed.keys(): - set_compressed.update({ value : all_set_compressed[value] }) - else: - set_compressed.update({ value : str(num_of_labels_occured + 1) }) - num_of_labels_occured += 1 - - all_set_compressed.update(set_compressed) - - # relabel nodes - for node in G.nodes(data = True): - node[1][node_label] = set_compressed[set_multisets[node[0]]] - - # calculate kernel with h iterations and add it to the final kernel - Kmatrix += base_kernel(Gn, node_label, edge_label) - - return Kmatrix + # initial for height = 0 + Kmatrix = base_kernel(Gn, node_label, edge_label) + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + for G in Gn: # for each graph + set_multisets = [] + for node in G.nodes(data = True): + # Multiset-label determination. + multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] + # sorting each multiset + multiset.sort() + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + + # label compression + set_unique = list(set(set_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({ value : all_set_compressed[value] }) + else: + set_compressed.update({ value : str(num_of_labels_occured + 1) }) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for node in G.nodes(data = True): + node[1][node_label] = set_compressed[set_multisets[node[0]]] + + # calculate kernel with h iterations and add it to the final kernel + Kmatrix += base_kernel(Gn, node_label, edge_label) + + return Kmatrix diff --git a/gklearn/preimage/experiments/xp_median_preimage.py b/gklearn/preimage/experiments/xp_median_preimage.py index f3206bc..903faaa 100644 --- a/gklearn/preimage/experiments/xp_median_preimage.py +++ b/gklearn/preimage/experiments/xp_median_preimage.py @@ -196,6 +196,66 @@ def xp_median_preimage_9_3(): print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + +def xp_median_preimage_9_4(): + """xp 9_4: MAO, WeisfeilerLehman, using CONSTANT. + """ + # set parameters. + ds_name = 'MAO' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 6, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) def xp_median_preimage_8_1(): @@ -383,6 +443,66 @@ def xp_median_preimage_8_3(): mpg_options['fit_method'] = fit_method generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + +def xp_median_preimage_8_4(): + """xp 8_4: Monoterpenoides, WeisfeilerLehman, using CONSTANT. + """ + # set parameters. + ds_name = 'Monoterpenoides' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 4, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + def xp_median_preimage_7_1(): """xp 7_1: MUTAG, StructuralSP, using CONSTANT. @@ -568,6 +688,66 @@ def xp_median_preimage_7_3(): print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + +def xp_median_preimage_7_4(): + """xp 7_4: MUTAG, WeisfeilerLehman, using CONSTANT. + """ + # set parameters. + ds_name = 'MUTAG' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 1, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) def xp_median_preimage_6_1(): @@ -1432,6 +1612,9 @@ if __name__ == "__main__": #### xp 7_3: MUTAG, Treelet, using CONSTANT. # xp_median_preimage_7_3() + + #### xp 7_4: MUTAG, WeisfeilerLehman, using CONSTANT. +# xp_median_preimage_7_4() #### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT. # xp_median_preimage_8_1() @@ -1442,6 +1625,9 @@ if __name__ == "__main__": #### xp 8_3: Monoterpenoides, Treelet, using CONSTANT. # xp_median_preimage_8_3() + #### xp 8_4: Monoterpenoides, WeisfeilerLehman, using CONSTANT. +# xp_median_preimage_8_4() + #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. # xp_median_preimage_9_1() @@ -1449,4 +1635,7 @@ if __name__ == "__main__": # xp_median_preimage_9_2() #### xp 9_3: MAO, Treelet, using CONSTANT. - xp_median_preimage_9_3() \ No newline at end of file +# xp_median_preimage_9_3() + + #### xp 9_4: MAO, WeisfeilerLehman, using CONSTANT. + xp_median_preimage_9_4() \ No newline at end of file diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 5f00641..f342465 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -751,8 +751,14 @@ class MedianPreimageGenerator(PreimageGenerator): edge_labels=self._dataset.edge_labels, ds_infos=self._dataset.get_dataset_infos(keys=['directed']), **self._kernel_options) + elif self._kernel_options['name'] == 'WeisfeilerLehman': + from gklearn.kernels import WeisfeilerLehman + self._graph_kernel = WeisfeilerLehman(node_labels=self._dataset.node_labels, + edge_labels=self._dataset.edge_labels, + ds_infos=self._dataset.get_dataset_infos(keys=['directed']), + **self._kernel_options) else: - raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet".') + raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".') # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): diff --git a/gklearn/tests/test_graph_kernels.py b/gklearn/tests/test_graph_kernels.py index a92ebe9..bb442f3 100644 --- a/gklearn/tests/test_graph_kernels.py +++ b/gklearn/tests/test_graph_kernels.py @@ -233,7 +233,7 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) -def test_treeletkernel(ds_name, parallel): +def test_Treelet(ds_name, parallel): """Test treelet kernel. """ from gklearn.kernels import Treelet @@ -258,28 +258,30 @@ def test_treeletkernel(ds_name, parallel): assert False, exception -# @pytest.mark.parametrize('ds_name', ['Acyclic']) -# #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) -# @pytest.mark.parametrize('base_kernel', ['subtree']) -# @pytest.mark.parametrize('parallel', ['imap_unordered', None]) -# def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel): -# """Test Weisfeiler-Lehman kernel. -# """ -# from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel +@pytest.mark.parametrize('ds_name', ['Acyclic']) +#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) +@pytest.mark.parametrize('base_kernel', ['subtree']) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_WeisfeilerLehman(ds_name, parallel, base_kernel): + """Test Weisfeiler-Lehman kernel. + """ + from gklearn.kernels import WeisfeilerLehman -# Gn, y = chooseDataset(ds_name) + dataset = chooseDataset(ds_name) -# try: -# Kmatrix, run_time = weisfeilerlehmankernel(Gn, -# node_label='atom', -# edge_label='bond_type', -# height=2, -# base_kernel=base_kernel, -# parallel=parallel, -# n_jobs=multiprocessing.cpu_count(), -# verbose=True) -# except Exception as exception: -# assert False, exception + try: + graph_kernel = WeisfeilerLehman(node_labels=dataset.node_labels, + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + height=2, base_kernel=base_kernel) + gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + except Exception as exception: + assert False, exception if __name__ == "__main__":