@@ -0,0 +1,5 @@ | |||
files: | |||
- source: /**/ | |||
ignore: | |||
- /datasets/ | |||
translation: /lang/%two_letters_code%/%original_path%/%original_file_name% |
@@ -7,15 +7,14 @@ A two-layer nested cross-validation (CV) is applied to select and evaluate model | |||
The machine used to execute the experiments is a cluster with 28 CPU cores of Intel(R) Xeon(R) E5-2680 v4 @ 2.40GHz, 252GB memory, and 64-bit operating system CentOS Linux release 7.3.1611. All results were run with Python 3.5.2. | |||
The figure below exhibits accuracies achieved by graph kernels implemented in `graphkit-learn` library. Each row corresponds to a dataset and each column to a graph kernel. Accuracies are in percentage for classification and in terms of errors of boiling points for regression (Alkane and | |||
Acyclic datasets). Red color indicates a worse result and green a better one. Gray cells with the “inf” marker indicate that the computation of the graph kernel on the dataset is neglected due to much higher consumption of computational resources than other kernels. | |||
The figure below exhibits accuracies achieved by graph kernels implemented in `graphkit-learn` library, in terms of regression error (the upper table) and classification rate (the lower table). Red color indicates the worse results and dark green the best ones. Gray cells with the “inf” marker indicate that the computation of the graph kernel on the dataset is omitted due to much higher consumption of computational resources than other kernels. | |||
.. image:: figures/all_test_accuracy.svg | |||
:width: 600 | |||
:alt: accuracies | |||
The figure below displays computational time consumed to compute Gram matrices of each graph | |||
kernels (in :math:`log10` of seconds) on each dataset. Colors have the same meaning as in the figure above. | |||
kernels (in :math:`log10` of seconds) on each dataset. Color legends have the same meaning as in the figure above. | |||
.. image:: figures/all_ave_gm_times.svg | |||
:width: 600 | |||
@@ -1367,7 +1367,7 @@ Q 28.265625 36.71875 33.203125 36.71875 | |||
z | |||
" id="DejaVuSans-Bold-57"/> | |||
</defs> | |||
<g transform="translate(298.715937 164.915897)scale(0.1 -0.1)"> | |||
<g transform="translate(298.715938 164.915897)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-45"/> | |||
<use x="41.503906" xlink:href="#DejaVuSans-Bold-48"/> | |||
<use x="111.083984" xlink:href="#DejaVuSans-Bold-46"/> | |||
@@ -1903,7 +1903,7 @@ z | |||
</g> | |||
<g id="text_42"> | |||
<!-- 0.14 --> | |||
<g transform="translate(300.790937 210.82894)scale(0.1 -0.1)"> | |||
<g transform="translate(300.790938 210.82894)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-48"/> | |||
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> | |||
<use x="107.568359" xlink:href="#DejaVuSans-Bold-49"/> | |||
@@ -2024,10 +2024,10 @@ L 9.1875 0 | |||
z | |||
" id="DejaVuSans-Bold-72"/> | |||
</defs> | |||
<g transform="translate(51.75075 233.785462)scale(0.1 -0.1)"> | |||
<g transform="translate(52.649188 233.785462)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-80"/> | |||
<use x="73.150391" xlink:href="#DejaVuSans-Bold-65"/> | |||
<use x="150.542969" xlink:href="#DejaVuSans-Bold-72"/> | |||
<use x="64.166016" xlink:href="#DejaVuSans-Bold-65"/> | |||
<use x="141.558594" xlink:href="#DejaVuSans-Bold-72"/> | |||
</g> | |||
</g> | |||
<g id="patch_50"> | |||
@@ -2126,7 +2126,7 @@ z | |||
</g> | |||
<g id="text_54"> | |||
<!-- 0.37 --> | |||
<g transform="translate(300.790937 233.785462)scale(0.1 -0.1)"> | |||
<g transform="translate(300.790938 233.785462)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-48"/> | |||
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> | |||
<use x="107.568359" xlink:href="#DejaVuSans-Bold-51"/> | |||
@@ -2231,7 +2231,7 @@ z | |||
</g> | |||
<g id="text_60"> | |||
<!-- Mutag --> | |||
<g transform="translate(39.417937 256.663859)scale(0.1 -0.1)"> | |||
<g transform="translate(39.417938 256.663859)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-77"/> | |||
<use x="99.511719" xlink:href="#DejaVuSans-Bold-117"/> | |||
<use x="170.703125" xlink:href="#DejaVuSans-Bold-116"/> | |||
@@ -2334,7 +2334,7 @@ z | |||
</g> | |||
<g id="text_66"> | |||
<!-- 0.77 --> | |||
<g transform="translate(300.790937 256.741984)scale(0.1 -0.1)"> | |||
<g transform="translate(300.790938 256.741984)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-48"/> | |||
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> | |||
<use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/> | |||
@@ -2546,7 +2546,7 @@ z | |||
</g> | |||
<g id="text_78"> | |||
<!-- 1.78 --> | |||
<g transform="translate(300.790937 279.698505)scale(0.1 -0.1)"> | |||
<g transform="translate(300.790938 279.698505)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-49"/> | |||
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> | |||
<use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/> | |||
@@ -2663,7 +2663,7 @@ L 9.1875 0 | |||
z | |||
" id="DejaVuSans-Bold-69"/> | |||
</defs> | |||
<g transform="translate(25.730437 302.576902)scale(0.1 -0.1)"> | |||
<g transform="translate(25.730438 302.576902)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-69"/> | |||
<use x="68.310547" xlink:href="#DejaVuSans-Bold-110"/> | |||
<use x="139.501953" xlink:href="#DejaVuSans-Bold-122"/> | |||
@@ -2896,7 +2896,7 @@ L 9.1875 0 | |||
z | |||
" id="DejaVuSans-Bold-68"/> | |||
</defs> | |||
<g transform="translate(48.211687 325.611549)scale(0.1 -0.1)"> | |||
<g transform="translate(48.211688 325.611549)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-65"/> | |||
<use x="77.392578" xlink:href="#DejaVuSans-Bold-73"/> | |||
<use x="114.599609" xlink:href="#DejaVuSans-Bold-68"/> | |||
@@ -2997,7 +2997,7 @@ z | |||
</g> | |||
<g id="text_102"> | |||
<!-- 3.74 --> | |||
<g transform="translate(300.790937 325.611549)scale(0.1 -0.1)"> | |||
<g transform="translate(300.790938 325.611549)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-51"/> | |||
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> | |||
<use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/> | |||
@@ -3097,7 +3097,7 @@ z | |||
" style="fill:#ffffff;stroke:#ffffff;stroke-linejoin:miter;stroke-width:0.1;"/> | |||
</g> | |||
<g id="text_108"> | |||
<!-- NCI11 --> | |||
<!-- NCI1 --> | |||
<defs> | |||
<path d="M 9.1875 72.90625 | |||
L 30.171875 72.90625 | |||
@@ -3132,12 +3132,11 @@ Q 61.765625 15.53125 67 19.09375 | |||
z | |||
" id="DejaVuSans-Bold-67"/> | |||
</defs> | |||
<g transform="translate(41.830437 348.568071)scale(0.1 -0.1)"> | |||
<g transform="translate(48.78825 348.568071)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-78"/> | |||
<use x="83.691406" xlink:href="#DejaVuSans-Bold-67"/> | |||
<use x="157.080078" xlink:href="#DejaVuSans-Bold-73"/> | |||
<use x="194.287109" xlink:href="#DejaVuSans-Bold-49"/> | |||
<use x="263.867188" xlink:href="#DejaVuSans-Bold-49"/> | |||
</g> | |||
</g> | |||
<g id="patch_110"> | |||
@@ -3231,7 +3230,7 @@ z | |||
</g> | |||
<g id="text_114"> | |||
<!-- inf --> | |||
<g style="fill:#ffffff;" transform="translate(305.678437 348.568071)scale(0.1 -0.1)"> | |||
<g style="fill:#ffffff;" transform="translate(305.678438 348.568071)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-105"/> | |||
<use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/> | |||
<use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/> | |||
@@ -3432,7 +3431,7 @@ z | |||
</g> | |||
<g id="text_126"> | |||
<!-- inf --> | |||
<g style="fill:#ffffff;" transform="translate(305.678437 371.524592)scale(0.1 -0.1)"> | |||
<g style="fill:#ffffff;" transform="translate(305.678438 371.524592)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-105"/> | |||
<use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/> | |||
<use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/> | |||
@@ -3669,7 +3668,7 @@ z | |||
</g> | |||
<g id="text_138"> | |||
<!-- inf --> | |||
<g style="fill:#ffffff;" transform="translate(305.678437 394.481114)scale(0.1 -0.1)"> | |||
<g style="fill:#ffffff;" transform="translate(305.678438 394.481114)scale(0.1 -0.1)"> | |||
<use xlink:href="#DejaVuSans-Bold-105"/> | |||
<use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/> | |||
<use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/> | |||
@@ -3759,29 +3758,29 @@ z | |||
</g> | |||
</g> | |||
<g id="line2d_1"> | |||
<path clip-path="url(#p7be840e85f)" d="M 82.726875 150.678261 | |||
<path clip-path="url(#p0d8d08ad84)" d="M 82.726875 150.678261 | |||
L 543.526875 150.678261 | |||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> | |||
</g> | |||
<g id="line2d_2"> | |||
<path clip-path="url(#p7be840e85f)" d="M 82.726875 403.2 | |||
<path clip-path="url(#p0d8d08ad84)" d="M 82.726875 403.2 | |||
L 543.526875 403.2 | |||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> | |||
</g> | |||
<g id="line2d_3"> | |||
<path clip-path="url(#p7be840e85f)" d="M 82.726875 403.2 | |||
<path clip-path="url(#p0d8d08ad84)" d="M 82.726875 403.2 | |||
L 82.726875 150.678261 | |||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> | |||
</g> | |||
<g id="line2d_4"> | |||
<path clip-path="url(#p7be840e85f)" d="M 543.526875 403.2 | |||
<path clip-path="url(#p0d8d08ad84)" d="M 543.526875 403.2 | |||
L 543.526875 150.678261 | |||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> | |||
</g> | |||
</g> | |||
</g> | |||
<defs> | |||
<clipPath id="p7be840e85f"> | |||
<clipPath id="p0d8d08ad84"> | |||
<rect height="396" width="460.8" x="82.726875" y="7.2"/> | |||
</clipPath> | |||
</defs> | |||
@@ -0,0 +1,196 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Oct 5 16:08:33 2020 | |||
@author: ljia | |||
This script compute classification accuracy of each geaph kernel on datasets | |||
with different entropy of degree distribution. | |||
""" | |||
from utils import Graph_Kernel_List, cross_validate | |||
import numpy as np | |||
import logging | |||
num_nodes = 40 | |||
half_num_graphs = 100 | |||
def generate_graphs(): | |||
# from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
# gsyzer = GraphSynthesizer() | |||
# graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||
# return graphs | |||
import networkx as nx | |||
degrees11 = [5] * num_nodes | |||
# degrees12 = [2] * num_nodes | |||
degrees12 = [5] * num_nodes | |||
degrees21 = list(range(1, 11)) * 6 | |||
# degrees22 = [5 * i for i in list(range(1, 11)) * 6] | |||
degrees22 = list(range(1, 11)) * 6 | |||
# method 1 | |||
graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
for g in graphs11: | |||
g.remove_edges_from(nx.selfloop_edges(g)) | |||
for g in graphs12: | |||
g.remove_edges_from(nx.selfloop_edges(g)) | |||
# method 2: can easily generate isomorphic graphs. | |||
# graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)] | |||
# graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)] | |||
# Add node labels. | |||
for g in graphs11: | |||
for n in g.nodes(): | |||
g.nodes[n]['atom'] = 0 | |||
for g in graphs12: | |||
for n in g.nodes(): | |||
g.nodes[n]['atom'] = 1 | |||
graphs1 = graphs11 + graphs12 | |||
# method 1: the entorpy of the two classes is not the same. | |||
graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
for g in graphs21: | |||
g.remove_edges_from(nx.selfloop_edges(g)) | |||
for g in graphs22: | |||
g.remove_edges_from(nx.selfloop_edges(g)) | |||
# # method 2: tooo slow, and may fail. | |||
# graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)] | |||
# graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)] | |||
# # method 3: no randomness. | |||
# graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)] | |||
# graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)] | |||
# # method 4: | |||
# graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
# graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
# # method 5: the entorpy of the two classes is not the same. | |||
# graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)] | |||
# graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)] | |||
# # method 6: seems there is no randomness0 | |||
# graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] | |||
# graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] | |||
# Add node labels. | |||
for g in graphs21: | |||
for n in g.nodes(): | |||
g.nodes[n]['atom'] = 0 | |||
for g in graphs22: | |||
for n in g.nodes(): | |||
g.nodes[n]['atom'] = 1 | |||
graphs2 = graphs21 + graphs22 | |||
# # check for isomorphism. | |||
# iso_mat1 = np.zeros((len(graphs1), len(graphs1))) | |||
# num1 = 0 | |||
# num2 = 0 | |||
# for i in range(len(graphs1)): | |||
# for j in range(i + 1, len(graphs1)): | |||
# if nx.is_isomorphic(graphs1[i], graphs1[j]): | |||
# iso_mat1[i, j] = 1 | |||
# iso_mat1[j, i] = 1 | |||
# num1 += 1 | |||
# print('iso:', num1, ':', i, ',', j) | |||
# else: | |||
# num2 += 1 | |||
# print('not iso:', num2, ':', i, ',', j) | |||
# | |||
# iso_mat2 = np.zeros((len(graphs2), len(graphs2))) | |||
# num1 = 0 | |||
# num2 = 0 | |||
# for i in range(len(graphs2)): | |||
# for j in range(i + 1, len(graphs2)): | |||
# if nx.is_isomorphic(graphs2[i], graphs2[j]): | |||
# iso_mat2[i, j] = 1 | |||
# iso_mat2[j, i] = 1 | |||
# num1 += 1 | |||
# print('iso:', num1, ':', i, ',', j) | |||
# else: | |||
# num2 += 1 | |||
# print('not iso:', num2, ':', i, ',', j) | |||
return graphs1, graphs2 | |||
def get_infos(graph): | |||
from gklearn.utils import Dataset | |||
ds = Dataset() | |||
ds.load_graphs(graph) | |||
infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree']) | |||
infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy']) | |||
print(infos['ave_degree_entropy'], ',', infos['ave_node_degree']) | |||
return infos | |||
def xp_accuracy_diff_entropy(): | |||
# Generate graphs. | |||
graphs1, graphs2 = generate_graphs() | |||
# Compute entropy of degree distribution of the generated graphs. | |||
info11 = get_infos(graphs1[0:half_num_graphs]) | |||
info12 = get_infos(graphs1[half_num_graphs:]) | |||
info21 = get_infos(graphs2[0:half_num_graphs]) | |||
info22 = get_infos(graphs2[half_num_graphs:]) | |||
# Run and save. | |||
import pickle | |||
import os | |||
save_dir = 'outputs/accuracy_diff_entropy/' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
accuracies = {} | |||
confidences = {} | |||
for kernel_name in Graph_Kernel_List: | |||
print() | |||
print('Kernel:', kernel_name) | |||
accuracies[kernel_name] = [] | |||
confidences[kernel_name] = [] | |||
for set_i, graphs in enumerate([graphs1, graphs2]): | |||
print() | |||
print('Graph set', set_i) | |||
tmp_graphs = [g.copy() for g in graphs] | |||
targets = [0] * half_num_graphs + [1] * half_num_graphs | |||
accuracy = 'error' | |||
confidence = 'error' | |||
try: | |||
accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1) | |||
except Exception as exp: | |||
print('An exception occured when running this experiment:') | |||
LOG_FILENAME = save_dir + 'error.txt' | |||
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||
logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':') | |||
print(repr(exp)) | |||
accuracies[kernel_name].append(accuracy) | |||
confidences[kernel_name].append(confidence) | |||
pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) | |||
pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) | |||
# Save all. | |||
pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb')) | |||
pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb')) | |||
return | |||
if __name__ == '__main__': | |||
xp_accuracy_diff_entropy() |
@@ -21,14 +21,14 @@ def xp_runtimes_of_all_28cores(): | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List: | |||
for ds_name in Dataset_List: | |||
print() | |||
print('Kernel:', kernel_name) | |||
print('Dataset:', ds_name) | |||
run_times[kernel_name] = [] | |||
for ds_name in Dataset_List: | |||
run_times[ds_name] = [] | |||
for kernel_name in Graph_Kernel_List: | |||
print() | |||
print('Dataset:', ds_name) | |||
print('Kernel:', kernel_name) | |||
# get graphs. | |||
graphs, _ = load_predefined_dataset(ds_name) | |||
@@ -43,7 +43,7 @@ def xp_runtimes_of_all_28cores(): | |||
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||
logging.exception('') | |||
print(repr(exp)) | |||
run_times[kernel_name].append(run_time) | |||
run_times[ds_name].append(run_time) | |||
pickle.dump(run_time, open(save_dir + 'run_time.' + kernel_name + '.' + ds_name + '.pkl', 'wb')) | |||
@@ -20,17 +20,17 @@ def xp_runtimes_diff_chunksizes(): | |||
os.makedirs(save_dir) | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List: | |||
for ds_name in Dataset_List: | |||
print() | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name] = [] | |||
for ds_name in Dataset_List: | |||
print('Dataset:', ds_name) | |||
run_times[ds_name] = [] | |||
for kernel_name in Graph_Kernel_List: | |||
print() | |||
print('Dataset:', ds_name) | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name].append([]) | |||
run_times[ds_name].append([]) | |||
for chunksize in [1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]: | |||
print() | |||
print('Chunksize:', chunksize) | |||
@@ -48,7 +48,7 @@ def xp_runtimes_diff_chunksizes(): | |||
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||
logging.exception('') | |||
print(repr(exp)) | |||
run_times[kernel_name][-1].append(run_time) | |||
run_times[ds_name][-1].append(run_time) | |||
pickle.dump(run_time, open(save_dir + 'run_time.' + kernel_name + '.' + ds_name + '.' + str(chunksize) + '.pkl', 'wb')) | |||
@@ -16,7 +16,7 @@ def generate_graphs(): | |||
return graphs | |||
def xp_synthesied_graphs_dataset_size(): | |||
def xp_synthesized_graphs_dataset_size(): | |||
# Generate graphs. | |||
graphs = generate_graphs() | |||
@@ -43,7 +43,7 @@ def xp_synthesied_graphs_dataset_size(): | |||
run_time = 'error' | |||
try: | |||
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name, n_jobs=1) | |||
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name) | |||
except Exception as exp: | |||
print('An exception occured when running this experiment:') | |||
LOG_FILENAME = save_dir + 'error.txt' | |||
@@ -61,4 +61,4 @@ def xp_synthesied_graphs_dataset_size(): | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_dataset_size() | |||
xp_synthesized_graphs_dataset_size() |
@@ -16,7 +16,7 @@ def generate_graphs(degree): | |||
return graphs | |||
def xp_synthesied_graphs_degrees(): | |||
def xp_synthesized_graphs_degrees(): | |||
# Run and save. | |||
import pickle | |||
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_degrees(): | |||
# Compute Gram matrix. | |||
run_time = 'error' | |||
try: | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1) | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
except Exception as exp: | |||
print('An exception occured when running this experiment:') | |||
LOG_FILENAME = save_dir + 'error.txt' | |||
@@ -60,4 +60,4 @@ def xp_synthesied_graphs_degrees(): | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_degrees() | |||
xp_synthesized_graphs_degrees() |
@@ -16,7 +16,7 @@ def generate_graphs(num_el_alp): | |||
return graphs | |||
def xp_synthesied_graphs_num_edge_label_alphabet(): | |||
def xp_synthesized_graphs_num_edge_label_alphabet(): | |||
# Run and save. | |||
import pickle | |||
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_num_edge_label_alphabet(): | |||
# Compute Gram matrix. | |||
run_time = 'error' | |||
try: | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1) | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
except Exception as exp: | |||
print('An exception occured when running this experiment:') | |||
LOG_FILENAME = save_dir + 'error.txt' | |||
@@ -60,4 +60,4 @@ def xp_synthesied_graphs_num_edge_label_alphabet(): | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_num_edge_label_alphabet() | |||
xp_synthesized_graphs_num_edge_label_alphabet() |
@@ -16,7 +16,7 @@ def generate_graphs(num_nl_alp): | |||
return graphs | |||
def xp_synthesied_graphs_num_node_label_alphabet(): | |||
def xp_synthesized_graphs_num_node_label_alphabet(): | |||
# Run and save. | |||
import pickle | |||
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_num_node_label_alphabet(): | |||
# Compute Gram matrix. | |||
run_time = 'error' | |||
try: | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1) | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
except Exception as exp: | |||
run_times[kernel_name].append('error') | |||
print('An exception occured when running this experiment:') | |||
@@ -61,4 +61,4 @@ def xp_synthesied_graphs_num_node_label_alphabet(): | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_num_node_label_alphabet() | |||
xp_synthesized_graphs_num_node_label_alphabet() |
@@ -16,7 +16,7 @@ def generate_graphs(num_nodes): | |||
return graphs | |||
def xp_synthesied_graphs_num_nodes(): | |||
def xp_synthesized_graphs_num_nodes(): | |||
# Run and save. | |||
import pickle | |||
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_num_nodes(): | |||
# Compute Gram matrix. | |||
run_time = 'error' | |||
try: | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1) | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
except Exception as exp: | |||
run_times[kernel_name].append('error') | |||
print('An exception occured when running this experiment:') | |||
@@ -61,4 +61,4 @@ def xp_synthesied_graphs_num_nodes(): | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_num_nodes() | |||
xp_synthesized_graphs_num_nodes() |
@@ -6,6 +6,8 @@ Created on Tue Sep 22 11:33:28 2020 | |||
@author: ljia | |||
""" | |||
import multiprocessing | |||
import numpy as np | |||
from gklearn.utils import model_selection_for_precomputed_kernel | |||
Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk'] | |||
@@ -60,7 +62,7 @@ def compute_graph_kernel(graphs, kernel_name, n_jobs=multiprocessing.cpu_count() | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||
params = {'compute_method': 'fp', 'weight': 1e-4, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||
elif kernel_name == 'SpectralDecomposition': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
@@ -109,4 +111,123 @@ def compute_graph_kernel(graphs, kernel_name, n_jobs=multiprocessing.cpu_count() | |||
params['verbose'] = True | |||
results = estimator(graphs, **params) | |||
return results[0], results[1] | |||
def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name='synthesized', n_jobs=multiprocessing.cpu_count()): | |||
param_grid = None | |||
if kernel_name == 'CommonWalk': | |||
from gklearn.kernels.commonWalkKernel import commonwalkkernel | |||
estimator = commonwalkkernel | |||
param_grid_precomputed = [{'compute_method': ['geo'], | |||
'weight': np.linspace(0.01, 0.15, 15)}] | |||
elif kernel_name == 'Marginalized': | |||
from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||
estimator = marginalizedkernel | |||
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9), | |||
'n_iteration': np.linspace(1, 19, 7), | |||
'remove_totters': [False]} | |||
elif kernel_name == 'SylvesterEquation': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
param_grid_precomputed = {'compute_method': ['sylvester'], | |||
# 'weight': np.linspace(0.01, 0.10, 10)} | |||
'weight': np.logspace(-1, -10, num=10, base=10)} | |||
elif kernel_name == 'ConjugateGradient': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
param_grid_precomputed = {'compute_method': ['conjugate'], | |||
'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel], | |||
'weight': np.logspace(-1, -10, num=10, base=10)} | |||
elif kernel_name == 'FixedPoint': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
param_grid_precomputed = {'compute_method': ['fp'], | |||
'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel], | |||
'weight': np.logspace(-3, -10, num=8, base=10)} | |||
elif kernel_name == 'SpectralDecomposition': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
param_grid_precomputed = {'compute_method': ['spectral'], | |||
'weight': np.logspace(-1, -10, num=10, base=10), | |||
'sub_kernel': ['geo', 'exp']} | |||
elif kernel_name == 'ShortestPath': | |||
from gklearn.kernels.spKernel import spkernel | |||
estimator = spkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
param_grid_precomputed = {'node_kernels': [sub_kernel]} | |||
elif kernel_name == 'StructuralSP': | |||
from gklearn.kernels.structuralspKernel import structuralspkernel | |||
estimator = structuralspkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
param_grid_precomputed = {'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel], | |||
'compute_method': ['naive']} | |||
elif kernel_name == 'PathUpToH': | |||
from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||
estimator = untilhpathkernel | |||
param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | |||
'k_func': ['MinMax', 'tanimoto'], # ['MinMax'], # | |||
'compute_method': ['trie']} # ['MinMax']} | |||
elif kernel_name == 'Treelet': | |||
from gklearn.kernels.treeletKernel import treeletkernel | |||
estimator = treeletkernel | |||
from gklearn.utils.kernels import polynomialkernel | |||
import functools | |||
gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) | |||
# for ga in np.linspace(1, 10, 10)] | |||
for ga in np.logspace(0, 10, num=11, base=10)] | |||
pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11) | |||
for c in np.logspace(0, 10, num=11, base=10)] | |||
param_grid_precomputed = {'sub_kernel': pkernels + gkernels} | |||
elif kernel_name == 'WLSubtree': | |||
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | |||
estimator = weisfeilerlehmankernel | |||
param_grid_precomputed = {'base_kernel': ['subtree'], | |||
'height': np.linspace(0, 10, 11)} | |||
param_grid = {'C': np.logspace(-10, 4, num=29, base=10)} | |||
if param_grid is None: | |||
param_grid = {'C': np.logspace(-10, 10, num=41, base=10)} | |||
results = model_selection_for_precomputed_kernel( | |||
graphs, | |||
estimator, | |||
param_grid_precomputed, | |||
param_grid, | |||
'classification', | |||
NUM_TRIALS=28, | |||
datafile_y=targets, | |||
extra_params=None, | |||
ds_name=ds_name, | |||
output_dir=output_dir, | |||
n_jobs=n_jobs, | |||
read_gm_from_file=False, | |||
verbose=True) | |||
return results[0], results[1] |
@@ -1,5 +1,5 @@ | |||
# -*-coding:utf-8 -*- | |||
"""gklearn - kernels module | |||
"""gklearn - graph kernels module | |||
""" | |||
# info | |||
@@ -10,9 +10,12 @@ __date__ = "November 2018" | |||
from gklearn.kernels.graph_kernel import GraphKernel | |||
from gklearn.kernels.common_walk import CommonWalk | |||
from gklearn.kernels.marginalized import Marginalized | |||
from gklearn.kernels.random_walk import RandomWalk | |||
from gklearn.kernels.random_walk_meta import RandomWalkMeta | |||
from gklearn.kernels.sylvester_equation import SylvesterEquation | |||
from gklearn.kernels.conjugate_gradient import ConjugateGradient | |||
from gklearn.kernels.fixed_point import FixedPoint | |||
from gklearn.kernels.spectral_decomposition import SpectralDecomposition | |||
from gklearn.kernels.random_walk import RandomWalk | |||
from gklearn.kernels.shortest_path import ShortestPath | |||
from gklearn.kernels.structural_sp import StructuralSP | |||
from gklearn.kernels.path_up_to_h import PathUpToH | |||
@@ -30,15 +30,15 @@ def commonwalkkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate common walk graph kernels between graphs. | |||
"""Compute common walk graph kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
node_label : string | |||
Node attribute used as symbolic label. The default node label is 'atom'. | |||
edge_label : string | |||
@@ -133,7 +133,7 @@ def commonwalkkernel(*args, | |||
# | |||
# for i, j, kernel in tqdm( | |||
# pool.imap_unordered(do_partial, itr, chunksize), | |||
# desc='calculating kernels', | |||
# desc='computing kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
@@ -145,14 +145,14 @@ def commonwalkkernel(*args, | |||
# # direct product graph method - exponential | |||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# if compute_method == 'exp': | |||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): | |||
# Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, | |||
# edge_label, weight) | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# | |||
# # direct product graph method - geometric | |||
# elif compute_method == 'geo': | |||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): | |||
# Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, | |||
# edge_label, weight) | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
@@ -161,7 +161,7 @@ def commonwalkkernel(*args, | |||
# # search all paths use brute force. | |||
# elif compute_method == 'brute': | |||
# n = int(n) | |||
# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset. | |||
# # get all paths of all graphs before computing kernels to save time, but this may cost a lot of memory for large dataset. | |||
# all_walks = [ | |||
# find_all_walks_until_length(Gn[i], n, node_label, edge_label) | |||
# for i in range(0, len(Gn)) | |||
@@ -185,13 +185,13 @@ def commonwalkkernel(*args, | |||
def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta): | |||
"""Calculate walk graph kernels up to n between 2 graphs using exponential | |||
"""Compute walk graph kernels up to n between 2 graphs using exponential | |||
series. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
Node attribute used as label. | |||
edge_label : string | |||
@@ -259,13 +259,13 @@ def wrapper_cw_exp(node_label, edge_label, beta, itr): | |||
def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma): | |||
"""Calculate common walk graph kernels up to n between 2 graphs using | |||
"""Compute common walk graph kernels up to n between 2 graphs using | |||
geometric series. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
Node attribute used as label. | |||
edge_label : string | |||
@@ -304,7 +304,7 @@ def _commonwalkkernel_brute(walks1, | |||
node_label='atom', | |||
edge_label='bond_type', | |||
labeled=True): | |||
"""Calculate walk graph kernels up to n between 2 graphs. | |||
"""Compute walk graph kernels up to n between 2 graphs. | |||
Parameters | |||
---------- | |||
@@ -46,7 +46,7 @@ class CommonWalk(GraphKernel): | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
@@ -102,7 +102,7 @@ class CommonWalk(GraphKernel): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
@@ -148,7 +148,7 @@ class CommonWalk(GraphKernel): | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
return kernel_list | |||
@@ -179,13 +179,13 @@ class CommonWalk(GraphKernel): | |||
def __kernel_do_exp(self, g1, g2, beta): | |||
"""Calculate common walk graph kernel between 2 graphs using exponential | |||
"""Compute common walk graph kernel between 2 graphs using exponential | |||
series. | |||
Parameters | |||
---------- | |||
g1, g2 : NetworkX graphs | |||
Graphs between which the kernels are calculated. | |||
Graphs between which the kernels are computed. | |||
beta : integer | |||
Weight. | |||
@@ -231,13 +231,13 @@ class CommonWalk(GraphKernel): | |||
def __kernel_do_geo(self, g1, g2, gamma): | |||
"""Calculate common walk graph kernel between 2 graphs using geometric | |||
"""Compute common walk graph kernel between 2 graphs using geometric | |||
series. | |||
Parameters | |||
---------- | |||
g1, g2 : NetworkX graphs | |||
Graphs between which the kernels are calculated. | |||
Graphs between which the kernels are computed. | |||
gamma : integer | |||
Weight. | |||
@@ -0,0 +1,322 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Aug 20 16:09:51 2020 | |||
@author: ljia | |||
@references: | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from scipy.sparse import identity | |||
from scipy.sparse.linalg import cg | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import RandomWalkMeta | |||
from gklearn.utils.utils import compute_vertex_kernels | |||
class ConjugateGradient(RandomWalkMeta): | |||
def __init__(self, **kwargs): | |||
super().__init__(**kwargs) | |||
self._node_kernels = kwargs.get('node_kernels', None) | |||
self._edge_kernels = kwargs.get('edge_kernels', None) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._node_attrs = kwargs.get('node_attrs', []) | |||
self._edge_attrs = kwargs.get('edge_attrs', []) | |||
def _compute_gm_series(self): | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
lmda = self._weight | |||
# Compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: # @todo | |||
pass | |||
return gram_matrix | |||
def _compute_gm_imap_unordered(self): | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
# Compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
# @todo: parallel this. | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_fun = self._wrapper_kernel_do | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
return gram_matrix | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
lmda = self._weight | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(g1, g_list[i], lmda) | |||
kernel_list[i] = kernel | |||
else: # @todo | |||
pass | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | |||
# @todo: parallel this. | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
def init_worker(g1_toshare, g_list_toshare): | |||
global G_g1, G_g_list | |||
G_g1 = g1_toshare | |||
G_g_list = g_list_toshare | |||
do_fun = self._wrapper_kernel_list_do | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]] = result[1] | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
return kernel_list | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self._check_edge_weight([g1] + [g2], self._verbose) | |||
self._check_graphs([g1] + [g2]) | |||
lmda = self._weight | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | |||
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
kernel = self.__kernel_do(g1, g2, lmda) | |||
else: # @todo | |||
pass | |||
return kernel | |||
def __kernel_do(self, g1, g2, lmda): | |||
# Frist, compute kernels between all pairs of nodes using the method borrowed | |||
# from FCSP. It is faster than directly computing all edge kernels | |||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | |||
# graphs compared, which is the most case we went though. For very | |||
# sparse graphs, this would be slow. | |||
vk_dict = self._compute_vertex_kernels(g1, g2) | |||
# Compute the weight matrix of the direct product graph. | |||
w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict) | |||
# use uniform distribution if there is no prior knowledge. | |||
p_times_uni = 1 / w_dim | |||
A = identity(w_times.shape[0]) - w_times * lmda | |||
b = np.full((w_dim, 1), p_times_uni) | |||
x, _ = cg(A, b) | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, w_dim), p_times_uni) | |||
return np.dot(q_times, x) | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight) | |||
def _func_fp(x, p_times, lmda, w_times): | |||
haha = w_times * x | |||
haha = lmda * haha | |||
haha = p_times + haha | |||
return p_times + lmda * np.dot(w_times, x) | |||
def _compute_vertex_kernels(self, g1, g2): | |||
"""Compute vertex kernels between vertices of two graphs. | |||
""" | |||
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | |||
# @todo: move if out to make it faster. | |||
# @todo: node/edge kernels use direct function rather than dicts. | |||
def _compute_weight_matrix(self, g1, g2, vk_dict): | |||
"""Compute the weight matrix of the direct product graph. | |||
""" | |||
# Define edge kernels. | |||
def compute_ek_11(e1, e2, ke): | |||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | |||
def compute_ek_10(e1, e2, ke): | |||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||
return ke(e1_labels, e2_labels) | |||
def compute_ek_01(e1, e2, ke): | |||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||
return ke(e1_attrs, e2_attrs) | |||
def compute_ek_00(e1, e2, ke): | |||
return 1 | |||
# Select the proper edge kernel. | |||
if len(self._edge_labels) > 0: | |||
# edge symb and non-synb labeled | |||
if len(self._edge_attrs) > 0: | |||
ke = self._edge_kernels['mix'] | |||
ek_temp = compute_ek_11 | |||
# edge symb labeled | |||
else: | |||
ke = self._edge_kernels['symb'] | |||
ek_temp = compute_ek_10 | |||
else: | |||
# edge non-synb labeled | |||
if len(self._edge_attrs) > 0: | |||
ke = self._edge_kernels['nsymb'] | |||
ek_temp = compute_ek_01 | |||
# edge unlabeled | |||
else: | |||
ke = None | |||
ek_temp = compute_ek_00 # @todo: check how much slower is this. | |||
# Compute the weight matrix. | |||
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | |||
w_times = np.zeros((w_dim, w_dim)) | |||
if vk_dict: # node labeled | |||
if self._ds_infos['directed']: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] | |||
else: # undirected | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])] | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
else: # node unlabeled | |||
if self._ds_infos['directed']: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp(e1, e2, ke) | |||
else: # undirected | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp(e1, e2, ke) | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
return w_times, w_dim |
@@ -14,61 +14,56 @@ import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from control import dlyap | |||
from scipy import optimize | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import RandomWalk | |||
from gklearn.kernels import RandomWalkMeta | |||
from gklearn.utils.utils import compute_vertex_kernels | |||
class FixedPoint(RandomWalk): | |||
class FixedPoint(RandomWalkMeta): | |||
def __init__(self, **kwargs): | |||
RandomWalk.__init__(self, **kwargs) | |||
super().__init__(**kwargs) | |||
self._node_kernels = kwargs.get('node_kernels', None) | |||
self._edge_kernels = kwargs.get('edge_kernels', None) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._node_attrs = kwargs.get('node_attrs', []) | |||
self._edge_attrs = kwargs.get('edge_attrs', []) | |||
def _compute_gm_series(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
lmda = self._weight | |||
# compute Gram matrix. | |||
# Compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | |||
# # normalized adjacency matrices | |||
# A_wave_list = [] | |||
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | |||
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() | |||
# norm = A_tilde.sum(axis=0) | |||
# norm[norm == 0] = 1 | |||
# A_wave_list.append(A_tilde / norm) | |||
if self._p == None: # p is uniform distribution as default. | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: # @todo | |||
pass | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: # @todo | |||
pass | |||
@@ -76,36 +71,31 @@ class FixedPoint(RandomWalk): | |||
def _compute_gm_imap_unordered(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
# Compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
def init_worker(A_wave_list_toshare): | |||
global G_A_wave_list | |||
G_A_wave_list = A_wave_list_toshare | |||
do_fun = self._wrapper_kernel_do | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
# @todo: parallel this. | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_fun = self._wrapper_kernel_do | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
@@ -113,39 +103,33 @@ class FixedPoint(RandomWalk): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
lmda = self._weight | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda) | |||
kernel_list[i] = kernel | |||
else: # @todo | |||
pass | |||
for i in iterator: | |||
kernel = self.__kernel_do(g1, g_list[i], lmda) | |||
kernel_list[i] = kernel | |||
else: # @todo | |||
pass | |||
@@ -153,43 +137,38 @@ class FixedPoint(RandomWalk): | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | |||
# @todo: parallel this. | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
if self._p == None: # p is uniform distribution as default. | |||
def init_worker(A_wave_1_toshare, A_wave_list_toshare): | |||
global G_A_wave_1, G_A_wave_list | |||
G_A_wave_1 = A_wave_1_toshare | |||
G_A_wave_list = A_wave_list_toshare | |||
def init_worker(g1_toshare, g_list_toshare): | |||
global G_g1, G_g_list | |||
G_g1 = g1_toshare | |||
G_g_list = g_list_toshare | |||
do_fun = self._wrapper_kernel_list_do | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]] = result[1] | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
do_fun = self._wrapper_kernel_list_do | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]] = result[1] | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
@@ -197,49 +176,146 @@ class FixedPoint(RandomWalk): | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight) | |||
return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self._check_edge_weight([g1] + [g2]) | |||
self._check_edge_weight([g1] + [g2], self._verbose) | |||
self._check_graphs([g1] + [g2]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
lmda = self._weight | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() | |||
if self._p == None: # p is uniform distribution as default. | |||
kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda) | |||
else: # @todo | |||
pass | |||
# Reindex nodes using consecutive integers for the convenience of kernel computation. | |||
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | |||
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | |||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | |||
kernel = self.__kernel_do(g1, g2, lmda) | |||
else: # @todo | |||
pass | |||
return kernel | |||
def __kernel_do(self, A_wave1, A_wave2, lmda): | |||
def __kernel_do(self, g1, g2, lmda): | |||
S = lmda * A_wave2 | |||
T_t = A_wave1 | |||
# Frist, compute kernels between all pairs of nodes using the method borrowed | |||
# from FCSP. It is faster than directly computing all edge kernels | |||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | |||
# graphs compared, which is the most case we went though. For very | |||
# sparse graphs, this would be slow. | |||
vk_dict = self._compute_vertex_kernels(g1, g2) | |||
# Compute the weight matrix of the direct product graph. | |||
w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict) | |||
# use uniform distribution if there is no prior knowledge. | |||
nb_pd = len(A_wave1) * len(A_wave2) | |||
p_times_uni = 1 / nb_pd | |||
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni) | |||
X = dlyap(S, T_t, M0) | |||
X = np.reshape(X, (-1, 1), order='F') | |||
p_times_uni = 1 / w_dim | |||
p_times = np.full((w_dim, 1), p_times_uni) | |||
x = optimize.fixed_point(self._func_fp, p_times, args=(p_times, lmda, w_times), xtol=1e-06, maxiter=1000) | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, nb_pd), p_times_uni) | |||
return np.dot(q_times, X) | |||
q_times = np.full((1, w_dim), p_times_uni) | |||
return np.dot(q_times, x) | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight) | |||
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight) | |||
def _func_fp(x, p_times, lmda, w_times): | |||
haha = w_times * x | |||
haha = lmda * haha | |||
haha = p_times + haha | |||
return p_times + lmda * np.dot(w_times, x) | |||
def _compute_vertex_kernels(self, g1, g2): | |||
"""Compute vertex kernels between vertices of two graphs. | |||
""" | |||
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | |||
# @todo: move if out to make it faster. | |||
# @todo: node/edge kernels use direct function rather than dicts. | |||
def _compute_weight_matrix(self, g1, g2, vk_dict): | |||
"""Compute the weight matrix of the direct product graph. | |||
""" | |||
# Define edge kernels. | |||
def compute_ek_11(e1, e2, ke): | |||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | |||
def compute_ek_10(e1, e2, ke): | |||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||
return ke(e1_labels, e2_labels) | |||
def compute_ek_01(e1, e2, ke): | |||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||
return ke(e1_attrs, e2_attrs) | |||
def compute_ek_00(e1, e2, ke): | |||
return 1 | |||
# Select the proper edge kernel. | |||
if len(self._edge_labels) > 0: | |||
# edge symb and non-synb labeled | |||
if len(self._edge_attrs) > 0: | |||
ke = self._edge_kernels['mix'] | |||
ek_temp = compute_ek_11 | |||
# edge symb labeled | |||
else: | |||
ke = self._edge_kernels['symb'] | |||
ek_temp = compute_ek_10 | |||
else: | |||
# edge non-synb labeled | |||
if len(self._edge_attrs) > 0: | |||
ke = self._edge_kernels['nsymb'] | |||
ek_temp = compute_ek_01 | |||
# edge unlabeled | |||
else: | |||
ke = None | |||
ek_temp = compute_ek_00 # @todo: check how much slower is this. | |||
# Compute the weight matrix. | |||
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | |||
w_times = np.zeros((w_dim, w_dim)) | |||
if vk_dict: # node labeled | |||
if self._ds_infos['directed']: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] | |||
else: # undirected | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])] | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
else: # node unlabeled | |||
if self._ds_infos['directed']: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp(e1, e2, ke) | |||
else: # undirected | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp(e1, e2, ke) | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
return w_times, w_dim |
@@ -104,7 +104,7 @@ class GraphKernel(object): | |||
if self._parallel == 'imap_unordered': | |||
gram_matrix = self._compute_gm_imap_unordered() | |||
elif self._parallel == None: | |||
elif self._parallel is None: | |||
gram_matrix = self._compute_gm_series() | |||
else: | |||
raise Exception('Parallel mode is not set correctly.') | |||
@@ -130,7 +130,7 @@ class GraphKernel(object): | |||
if self._parallel == 'imap_unordered': | |||
kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) | |||
elif self._parallel == None: | |||
elif self._parallel is None: | |||
kernel_list = self._compute_kernel_list_series(g1, g_list) | |||
else: | |||
raise Exception('Parallel mode is not set correctly.') | |||
@@ -59,7 +59,7 @@ class Marginalized(GraphKernel): | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
@@ -119,7 +119,7 @@ class Marginalized(GraphKernel): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
@@ -165,7 +165,7 @@ class Marginalized(GraphKernel): | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
return kernel_list | |||
@@ -184,12 +184,12 @@ class Marginalized(GraphKernel): | |||
def __kernel_do(self, g1, g2): | |||
"""Calculate marginalized graph kernel between 2 graphs. | |||
"""Compute marginalized graph kernel between 2 graphs. | |||
Parameters | |||
---------- | |||
g1, g2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
2 graphs between which the kernel is computed. | |||
Return | |||
------ | |||
@@ -212,12 +212,12 @@ class Marginalized(GraphKernel): | |||
# # matrix to save all the R_inf for all pairs of nodes | |||
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | |||
# | |||
# # calculate R_inf with a simple interative method | |||
# # Compute R_inf with a simple interative method | |||
# for i in range(1, n_iteration): | |||
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | |||
# R_inf_new.fill(r1) | |||
# | |||
# # calculate R_inf for each pair of nodes | |||
# # Compute R_inf for each pair of nodes | |||
# for node1 in g1.nodes(data=True): | |||
# neighbor_n1 = g1[node1[0]] | |||
# # the transition probability distribution in the random walks | |||
@@ -243,7 +243,7 @@ class Marginalized(GraphKernel): | |||
# neighbor2] # ref [1] equation (8) | |||
# R_inf[:] = R_inf_new | |||
# | |||
# # add elements of R_inf up and calculate kernel | |||
# # add elements of R_inf up and compute kernel | |||
# for node1 in g1.nodes(data=True): | |||
# for node2 in g2.nodes(data=True): | |||
# s = p_init_G1 * p_init_G2 * deltakernel( | |||
@@ -288,11 +288,11 @@ class Marginalized(GraphKernel): | |||
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ | |||
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) | |||
# calculate R_inf with a simple interative method | |||
# Compute R_inf with a simple interative method | |||
for i in range(2, self.__n_iteration + 1): | |||
R_inf_old = R_inf.copy() | |||
# calculate R_inf for each pair of nodes | |||
# Compute R_inf for each pair of nodes | |||
for node1 in g1.nodes(): | |||
neighbor_n1 = g1[node1] | |||
# the transition probability distribution in the random walks | |||
@@ -309,7 +309,7 @@ class Marginalized(GraphKernel): | |||
(t_dict[(node1, node2, neighbor1, neighbor2)] * \ | |||
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | |||
# add elements of R_inf up and calculate kernel | |||
# add elements of R_inf up and compute kernel. | |||
for (n1, n2), value in R_inf.items(): | |||
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) | |||
kernel += s * value # ref [1] equation (6) | |||
@@ -39,15 +39,15 @@ def marginalizedkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate marginalized graph kernels between graphs. | |||
"""Compute marginalized graph kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
node_label : string | |||
Node attribute used as symbolic label. The default node label is 'atom'. | |||
@@ -59,7 +59,7 @@ def marginalizedkernel(*args, | |||
The termination probability in the random walks generating step. | |||
n_iteration : integer | |||
Time of iterations to calculate R_inf. | |||
Time of iterations to compute R_inf. | |||
remove_totters : boolean | |||
Whether to remove totterings by method introduced in [2]. The default | |||
@@ -83,11 +83,11 @@ def marginalizedkernel(*args, | |||
Gn, | |||
attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | |||
node_label=node_label, edge_label=edge_label) | |||
if not ds_attrs['node_labeled'] or node_label == None: | |||
if not ds_attrs['node_labeled'] or node_label is None: | |||
node_label = 'atom' | |||
for G in Gn: | |||
nx.set_node_attributes(G, '0', 'atom') | |||
if not ds_attrs['edge_labeled'] or edge_label == None: | |||
if not ds_attrs['edge_labeled'] or edge_label is None: | |||
edge_label = 'bond_type' | |||
for G in Gn: | |||
nx.set_edge_attributes(G, '0', 'bond_type') | |||
@@ -133,7 +133,7 @@ def marginalizedkernel(*args, | |||
# # ---- direct running, normally use single CPU core. ---- | |||
## pbar = tqdm( | |||
## total=(1 + len(Gn)) * len(Gn) / 2, | |||
## desc='calculating kernels', | |||
## desc='Computing kernels', | |||
## file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
@@ -152,12 +152,12 @@ def marginalizedkernel(*args, | |||
def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||
"""Calculate marginalized graph kernel between 2 graphs. | |||
"""Compute marginalized graph kernel between 2 graphs. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
2 graphs between which the kernel is computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -165,7 +165,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||
p_quit : integer | |||
the termination probability in the random walks generating step. | |||
n_iteration : integer | |||
time of iterations to calculate R_inf. | |||
time of iterations to compute R_inf. | |||
Return | |||
------ | |||
@@ -188,12 +188,12 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||
# # matrix to save all the R_inf for all pairs of nodes | |||
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | |||
# | |||
# # calculate R_inf with a simple interative method | |||
# # Compute R_inf with a simple interative method | |||
# for i in range(1, n_iteration): | |||
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | |||
# R_inf_new.fill(r1) | |||
# | |||
# # calculate R_inf for each pair of nodes | |||
# # Compute R_inf for each pair of nodes | |||
# for node1 in g1.nodes(data=True): | |||
# neighbor_n1 = g1[node1[0]] | |||
# # the transition probability distribution in the random walks | |||
@@ -219,7 +219,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||
# neighbor2] # ref [1] equation (8) | |||
# R_inf[:] = R_inf_new | |||
# | |||
# # add elements of R_inf up and calculate kernel | |||
# # add elements of R_inf up and compute kernel. | |||
# for node1 in g1.nodes(data=True): | |||
# for node2 in g2.nodes(data=True): | |||
# s = p_init_G1 * p_init_G2 * deltakernel( | |||
@@ -267,11 +267,11 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||
neighbor_n1[neighbor1][edge_label], | |||
neighbor_n2[neighbor2][edge_label]) | |||
# calculate R_inf with a simple interative method | |||
# Compute R_inf with a simple interative method | |||
for i in range(2, n_iteration + 1): | |||
R_inf_old = R_inf.copy() | |||
# calculate R_inf for each pair of nodes | |||
# Compute R_inf for each pair of nodes | |||
for node1 in g1.nodes(): | |||
neighbor_n1 = g1[node1] | |||
# the transition probability distribution in the random walks | |||
@@ -288,7 +288,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||
(t_dict[(node1, node2, neighbor1, neighbor2)] * \ | |||
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | |||
# add elements of R_inf up and calculate kernel | |||
# add elements of R_inf up and compute kernel. | |||
for (n1, n2), value in R_inf.items(): | |||
s = p_init_G1 * p_init_G2 * deltakernel( | |||
g1.nodes[n1][node_label], g2.nodes[n2][node_label]) | |||
@@ -24,7 +24,7 @@ from gklearn.kernels import GraphKernel | |||
from gklearn.utils import Trie | |||
class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
@@ -43,7 +43,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout) | |||
iterator_kernel = tqdm(itr_kernel, desc='calculating kernels', file=sys.stdout) | |||
iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator_ps = range(0, len(self._graphs)) | |||
iterator_kernel = itr_kernel | |||
@@ -69,7 +69,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
def _compute_gm_imap_unordered(self): | |||
self.__add_dummy_labels(self._graphs) | |||
# get all paths of all graphs before calculating kernels to save time, | |||
# get all paths of all graphs before computing kernels to save time, | |||
# but this may cost a lot of memory for large datasets. | |||
pool = Pool(self._n_jobs) | |||
itr = zip(self._graphs, range(0, len(self._graphs))) | |||
@@ -123,7 +123,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
if self._verbose >= 2: | |||
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | |||
iterator_kernel = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator_ps = g_list | |||
iterator_kernel = range(len(g_list)) | |||
@@ -149,7 +149,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
# get all paths of all graphs before calculating kernels to save time, | |||
# get all paths of all graphs before computing kernels to save time, | |||
# but this may cost a lot of memory for large datasets. | |||
pool = Pool(self._n_jobs) | |||
itr = zip(g_list, range(0, len(g_list))) | |||
@@ -190,7 +190,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
return kernel_list | |||
@@ -218,7 +218,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
def __kernel_do_trie(self, trie1, trie2): | |||
"""Calculate path graph kernels up to depth d between 2 graphs using trie. | |||
"""Compute path graph kernels up to depth d between 2 graphs using trie. | |||
Parameters | |||
---------- | |||
@@ -335,7 +335,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
def __kernel_do_naive(self, paths1, paths2): | |||
"""Calculate path graph kernels up to depth d between 2 graphs naively. | |||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||
Parameters | |||
---------- | |||
@@ -37,15 +37,15 @@ def randomwalkkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate random walk graph kernels. | |||
"""Compute random walk graph kernels. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
compute_method : string | |||
Method used to compute kernel. The Following choices are | |||
@@ -125,7 +125,7 @@ def randomwalkkernel(*args, | |||
Gn = [g.copy() for g in Gn] | |||
eweight = None | |||
if edge_weight == None: | |||
if edge_weight is None: | |||
if verbose: | |||
print('\n None edge weight specified. Set all weight to 1.\n') | |||
else: | |||
@@ -212,12 +212,12 @@ def randomwalkkernel(*args, | |||
############################################################################### | |||
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True): | |||
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | |||
"""Compute walk graph kernels up to n between 2 graphs using Sylvester method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
Graphs between which the kernel is computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -230,7 +230,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True | |||
""" | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
if q == None: | |||
if q is None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_list = [ | |||
@@ -245,7 +245,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True | |||
# norm = A_tilde.sum(axis=0) | |||
# norm[norm == 0] = 1 | |||
# A_wave_list.append(A_tilde / norm) | |||
if p == None: # p is uniform distribution as default. | |||
if p is None: # p is uniform distribution as default. | |||
def init_worker(Awl_toshare): | |||
global G_Awl | |||
G_Awl = Awl_toshare | |||
@@ -255,7 +255,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# desc='Computing kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
@@ -300,12 +300,12 @@ def _se_do(A_wave1, A_wave2, lmda): | |||
############################################################################### | |||
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): | |||
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method. | |||
"""Compute walk graph kernels up to n between 2 graphs using conjugate method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
Graphs between which the kernel is computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -321,14 +321,14 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | |||
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1: | |||
# # this is faster from unlabeled graphs. @todo: why? | |||
# if q == None: | |||
# if q is None: | |||
# # don't normalize adjacency matrices if q is a uniform vector. Note | |||
# # A_wave_list actually contains the transposes of the adjacency matrices. | |||
# A_wave_list = [ | |||
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in | |||
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | |||
# ] | |||
# if p == None: # p is uniform distribution as default. | |||
# if p is None: # p is uniform distribution as default. | |||
# def init_worker(Awl_toshare): | |||
# global G_Awl | |||
# G_Awl = Awl_toshare | |||
@@ -336,23 +336,23 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
# glbv=(A_wave_list,), n_jobs=n_jobs) | |||
# else: | |||
# reindex nodes using consecutive integers for convenience of kernel calculation. | |||
# reindex nodes using consecutive integers for convenience of kernel computation. | |||
Gn = [nx.convert_node_labels_to_integers( | |||
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | |||
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | |||
if p == None and q == None: # p and q are uniform distributions as default. | |||
if p is None and q is None: # p and q are uniform distributions as default. | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, | |||
do_partial = partial(wrapper_cg_labeled_do, ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# desc='Computing kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
@@ -382,24 +382,24 @@ def _cg_unlabled_do(A_wave1, A_wave2, lmda): | |||
return np.dot(q_times, x) | |||
def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||
def wrapper_cg_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||
edge_label, lmda, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||
return i, j, _cg_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
def _cg_labeled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
edge_kernels, edge_label, lmda): | |||
# Frist, compute kernels between all pairs of nodes, method borrowed | |||
# Frist, compute kernels between all pairs of nodes using the method borrowed | |||
# from FCSP. It is faster than directly computing all edge kernels | |||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | |||
# graphs compared, which is the most case we went though. For very | |||
# sparse graphs, this would be slow. | |||
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) | |||
# Compute weight matrix of the direct product graph. | |||
# Compute the weight matrix of the direct product graph. | |||
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, | |||
edge_kernels, edge_label) | |||
# use uniform distribution if there is no prior knowledge. | |||
@@ -415,12 +415,12 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
############################################################################### | |||
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): | |||
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||
"""Compute walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
Graphs between which the kernel is computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -438,17 +438,17 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | |||
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1: | |||
# # this is faster from unlabeled graphs. @todo: why? | |||
# if q == None: | |||
# if q is None: | |||
# # don't normalize adjacency matrices if q is a uniform vector. Note | |||
# # A_wave_list actually contains the transposes of the adjacency matrices. | |||
# A_wave_list = [ | |||
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in | |||
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | |||
# ] | |||
# if p == None: # p is uniform distribution as default. | |||
# if p is None: # p is uniform distribution as default. | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# desc='Computing kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
@@ -464,33 +464,33 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# pbar.update(1) | |||
# else: | |||
# reindex nodes using consecutive integers for convenience of kernel calculation. | |||
# reindex nodes using consecutive integers for the convenience of kernel computation. | |||
Gn = [nx.convert_node_labels_to_integers( | |||
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | |||
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | |||
if p == None and q == None: # p and q are uniform distributions as default. | |||
if p is None and q is None: # p and q are uniform distributions as default. | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, | |||
do_partial = partial(wrapper_fp_labeled_do, ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | |||
return Kmatrix | |||
def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||
def wrapper_fp_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||
edge_label, lmda, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||
return i, j, _fp_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
def _fp_labeled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
edge_kernels, edge_label, lmda): | |||
# Frist, compute kernels between all pairs of nodes, method borrowed | |||
# Frist, compute kernels between all pairs of nodes using the method borrowed | |||
# from FCSP. It is faster than directly computing all edge kernels | |||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | |||
# graphs compared, which is the most case we went though. For very | |||
@@ -519,13 +519,13 @@ def func_fp(x, p_times, lmda, w_times): | |||
############################################################################### | |||
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True): | |||
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using | |||
"""Compute walk graph kernels up to n between 2 unlabeled graphs using | |||
spectral decomposition method. Labels will be ignored. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
Graphs between which the kernel is computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -538,7 +538,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk | |||
""" | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
if q == None: | |||
if q is None: | |||
# precompute the spectral decomposition of each graph. | |||
P_list = [] | |||
D_list = [] | |||
@@ -552,7 +552,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk | |||
P_list.append(ev) | |||
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? | |||
if p == None: # p is uniform distribution as default. | |||
if p is None: # p is uniform distribution as default. | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn] | |||
# q_T_list = [q.T for q in q_list] | |||
def init_worker(q_T_toshare, P_toshare, D_toshare): | |||
@@ -568,7 +568,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# desc='Computing kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
@@ -605,12 +605,12 @@ def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | |||
############################################################################### | |||
def _randomwalkkernel_kron(G1, G2, node_label, edge_label): | |||
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||
"""Compute walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
Graphs between which the kernel is computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -692,8 +692,8 @@ def computeVK(g1, g2, ds_attrs, node_kernels, node_label): | |||
def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label): | |||
'''Compute weight matrix of the direct product graph. | |||
''' | |||
"""Compute the weight matrix of the direct product graph. | |||
""" | |||
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | |||
w_times = np.zeros((w_dim, w_dim)) | |||
if vk_dict: # node labeled | |||
@@ -10,85 +10,47 @@ Created on Wed Aug 19 16:55:17 2020 | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.utils.utils import direct_product_graph | |||
from gklearn.kernels import GraphKernel | |||
from gklearn.kernels import SylvesterEquation, ConjugateGradient, FixedPoint, SpectralDecomposition | |||
class RandomWalk(GraphKernel): | |||
class RandomWalk(SylvesterEquation, ConjugateGradient, FixedPoint, SpectralDecomposition): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self._compute_method = kwargs.get('compute_method', None) | |||
self._weight = kwargs.get('weight', 1) | |||
self._p = kwargs.get('p', None) | |||
self._q = kwargs.get('q', None) | |||
self._edge_weight = kwargs.get('edge_weight', None) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
self._compute_method = self._compute_method.lower() | |||
self._compute_method = self.__compute_method.lower() | |||
if self._compute_method == 'sylvester': | |||
self._parent = SylvesterEquation | |||
elif self._compute_method == 'conjugate': | |||
self._parent = ConjugateGradient | |||
elif self._compute_method == 'fp': | |||
self._parent = FixedPoint | |||
elif self._compute_method == 'spectral': | |||
self._parent = SpectralDecomposition | |||
elif self._compute_method == 'kon': | |||
raise Exception('This computing method is not completed yet.') | |||
else: | |||
raise Exception('This computing method does not exist. The possible choices inlcude: "sylvester", "conjugate", "fp", "spectral".') | |||
self._parent.__init__(self, **kwargs) | |||
def _compute_gm_series(self): | |||
pass | |||
return self._parent._compute_gm_series(self) | |||
def _compute_gm_imap_unordered(self): | |||
pass | |||
return self._parent._compute_gm_imap_unordered(self) | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
pass | |||
return self._parent._compute_kernel_list_series(self, g1, g_list) | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
pass | |||
return self._parent._compute_kernel_list_imap_unordered(self, g1, g_list) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
pass | |||
def _check_graphs(self, Gn): | |||
# remove graphs with no edges, as no walk can be found in their structures, | |||
# so the weight matrix between such a graph and itself might be zero. | |||
for g in Gn: | |||
if nx.number_of_edges(g) == 0: | |||
raise Exception('Graphs must contain edges to construct weight matrices.') | |||
def _check_edge_weight(self, G0, verbose): | |||
eweight = None | |||
if self._edge_weight == None: | |||
if verbose >= 2: | |||
print('\n None edge weight is specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0] | |||
if isinstance(some_weight, float) or isinstance(some_weight, int): | |||
eweight = self._edge_weight | |||
else: | |||
if verbose >= 2: | |||
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight) | |||
except: | |||
if verbose >= 2: | |||
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight) | |||
self._edge_weight = eweight | |||
def _add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] | |||
return self._parent._compute_single_kernel_series(self, g1, g2) |
@@ -0,0 +1,86 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Aug 19 16:55:17 2020 | |||
@author: ljia | |||
@references: | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import networkx as nx | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.kernels import GraphKernel | |||
class RandomWalkMeta(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self._weight = kwargs.get('weight', 1) | |||
self._p = kwargs.get('p', None) | |||
self._q = kwargs.get('q', None) | |||
self._edge_weight = kwargs.get('edge_weight', None) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
def _compute_gm_series(self): | |||
pass | |||
def _compute_gm_imap_unordered(self): | |||
pass | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
pass | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
pass | |||
def _compute_single_kernel_series(self, g1, g2): | |||
pass | |||
def _check_graphs(self, Gn): | |||
# remove graphs with no edges, as no walk can be found in their structures, | |||
# so the weight matrix between such a graph and itself might be zero. | |||
for g in Gn: | |||
if nx.number_of_edges(g) == 0: | |||
raise Exception('Graphs must contain edges to construct weight matrices.') | |||
def _check_edge_weight(self, G0, verbose): | |||
eweight = None | |||
if self._edge_weight is None: | |||
if verbose >= 2: | |||
print('\n None edge weight is specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0] | |||
if isinstance(some_weight, float) or isinstance(some_weight, int): | |||
eweight = self._edge_weight | |||
else: | |||
if verbose >= 2: | |||
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight) | |||
except: | |||
if verbose >= 2: | |||
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight) | |||
self._edge_weight = eweight | |||
def _add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -47,7 +47,7 @@ class ShortestPath(GraphKernel): | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
@@ -102,7 +102,7 @@ class ShortestPath(GraphKernel): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
@@ -145,7 +145,7 @@ class ShortestPath(GraphKernel): | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
return kernel_list | |||
@@ -29,15 +29,15 @@ def spkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate shortest-path kernels between graphs. | |||
"""Compute shortest-path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
node_label : string | |||
Node attribute used as label. The default node label is atom. | |||
@@ -179,7 +179,7 @@ def spkernel(*args, | |||
# do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) | |||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# for i, j, kernel in tqdm( | |||
# pool.map(do_partial, itr), desc='calculating kernels', | |||
# pool.map(do_partial, itr), desc='Computing kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
@@ -202,7 +202,7 @@ def spkernel(*args, | |||
# # ---- direct running, normally use single CPU core. ---- | |||
# from itertools import combinations_with_replacement | |||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): | |||
# kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels) | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
@@ -16,19 +16,19 @@ import numpy as np | |||
import networkx as nx | |||
from scipy.sparse import kron | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import RandomWalk | |||
from gklearn.kernels import RandomWalkMeta | |||
class SpectralDecomposition(RandomWalk): | |||
class SpectralDecomposition(RandomWalkMeta): | |||
def __init__(self, **kwargs): | |||
RandomWalk.__init__(self, **kwargs) | |||
super().__init__(**kwargs) | |||
self._sub_kernel = kwargs.get('sub_kernel', None) | |||
def _compute_gm_series(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -37,7 +37,7 @@ class SpectralDecomposition(RandomWalk): | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
if self._q is None: | |||
# precompute the spectral decomposition of each graph. | |||
P_list = [] | |||
D_list = [] | |||
@@ -54,14 +54,14 @@ class SpectralDecomposition(RandomWalk): | |||
P_list.append(ev) | |||
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] | |||
# q_T_list = [q.T for q in q_list] | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
@@ -79,7 +79,7 @@ class SpectralDecomposition(RandomWalk): | |||
def _compute_gm_imap_unordered(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -88,7 +88,7 @@ class SpectralDecomposition(RandomWalk): | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
if self._q is None: | |||
# precompute the spectral decomposition of each graph. | |||
P_list = [] | |||
D_list = [] | |||
@@ -104,7 +104,7 @@ class SpectralDecomposition(RandomWalk): | |||
D_list.append(ew) | |||
P_list.append(ev) # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel? | |||
def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare): | |||
@@ -126,7 +126,7 @@ class SpectralDecomposition(RandomWalk): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -135,16 +135,16 @@ class SpectralDecomposition(RandomWalk): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
if self._q is None: | |||
# precompute the spectral decomposition of each graph. | |||
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
D1, P1 = np.linalg.eig(A1) | |||
P_list = [] | |||
D_list = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout) | |||
iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
iterator = g_list | |||
for G in iterator: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A actually is the transpose of the adjacency matrix. | |||
@@ -153,11 +153,11 @@ class SpectralDecomposition(RandomWalk): | |||
D_list.append(ew) | |||
P_list.append(ev) | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
q_T1 = 1 / nx.number_of_nodes(g1) | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
@@ -174,7 +174,7 @@ class SpectralDecomposition(RandomWalk): | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -183,7 +183,7 @@ class SpectralDecomposition(RandomWalk): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
if self._q is None: | |||
# precompute the spectral decomposition of each graph. | |||
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
D1, P1 = np.linalg.eig(A1) | |||
@@ -201,7 +201,7 @@ class SpectralDecomposition(RandomWalk): | |||
D_list.append(ew) | |||
P_list.append(ev) # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
q_T1 = 1 / nx.number_of_nodes(g1) | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel? | |||
@@ -221,7 +221,7 @@ class SpectralDecomposition(RandomWalk): | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
@@ -236,20 +236,20 @@ class SpectralDecomposition(RandomWalk): | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self._check_edge_weight([g1] + [g2]) | |||
self._check_edge_weight([g1] + [g2], self._verbose) | |||
self._check_graphs([g1] + [g2]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
if self._q == None: | |||
if self._q is None: | |||
# precompute the spectral decomposition of each graph. | |||
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
D1, P1 = np.linalg.eig(A1) | |||
A2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() | |||
D2, P2 = np.linalg.eig(A2) | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
q_T1 = 1 / nx.number_of_nodes(g1) | |||
q_T2 = 1 / nx.number_of_nodes(g2) | |||
kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel) | |||
@@ -18,7 +18,7 @@ from tqdm import tqdm | |||
# import networkx as nx | |||
import numpy as np | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.utils.utils import get_shortest_paths | |||
from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels | |||
from gklearn.kernels import GraphKernel | |||
@@ -57,7 +57,7 @@ class StructuralSP(GraphKernel): | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
if self.__compute_method == 'trie': | |||
@@ -135,7 +135,7 @@ class StructuralSP(GraphKernel): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
if self.__compute_method == 'trie': | |||
@@ -193,7 +193,7 @@ class StructuralSP(GraphKernel): | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
return kernel_list | |||
@@ -273,7 +273,7 @@ class StructuralSP(GraphKernel): | |||
if len(p1) == len(p2): | |||
kernel += 1 | |||
try: | |||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average | |||
except ZeroDivisionError: | |||
print(spl1, spl2) | |||
print(g1.nodes(data=True)) | |||
@@ -318,40 +318,7 @@ class StructuralSP(GraphKernel): | |||
def __get_all_node_kernels(self, g1, g2): | |||
# compute shortest path matrices, method borrowed from FCSP. | |||
vk_dict = {} # shortest path matrices dict | |||
if len(self.__node_labels) > 0: | |||
# node symb and non-synb labeled | |||
if len(self.__node_attrs) > 0: | |||
kn = self.__node_kernels['mix'] | |||
for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)): | |||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) | |||
# node symb labeled | |||
else: | |||
kn = self.__node_kernels['symb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) | |||
else: | |||
# node non-synb labeled | |||
if len(self.__node_attrs) > 0: | |||
kn = self.__node_kernels['nsymb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) | |||
# node unlabeled | |||
else: | |||
pass | |||
return vk_dict | |||
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | |||
def __get_all_edge_kernels(self, g1, g2): | |||
@@ -37,15 +37,15 @@ def structuralspkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate mean average structural shortest path kernels between graphs. | |||
"""Compute mean average structural shortest path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
node_label : string | |||
Node attribute used as label. The default node label is atom. | |||
@@ -215,7 +215,7 @@ def structuralspkernel(*args, | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
if verbose: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
if compute_method == 'trie': | |||
@@ -241,7 +241,7 @@ def structuralspkernel(*args, | |||
# combinations_with_replacement(splist, 2), | |||
# combinations_with_replacement(range(0, len(Gn)), 2)) | |||
# for i, j, kernel in tqdm( | |||
# pool.map(do_partial, itr), desc='calculating kernels', | |||
# pool.map(do_partial, itr), desc='Computing kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
@@ -263,7 +263,7 @@ def structuralspkernel(*args, | |||
# with closing(Pool(n_jobs)) as pool: | |||
# for i, j, kernel in tqdm( | |||
# pool.imap_unordered(do_partial, itr, 1000), | |||
# desc='calculating kernels', | |||
# desc='Computing kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
@@ -335,7 +335,7 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label, | |||
if len(p1) == len(p2): | |||
kernel += 1 | |||
try: | |||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average | |||
except ZeroDivisionError: | |||
print(spl1, spl2) | |||
print(g1.nodes(data=True)) | |||
@@ -429,7 +429,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label, | |||
# # compute graph kernels | |||
# traverseBothTrie(trie1[0].root, trie2[0], kernel) | |||
# | |||
# kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average | |||
# kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average | |||
# # traverse all paths in graph1. Deep-first search is applied. | |||
# def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]): | |||
@@ -485,7 +485,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label, | |||
else: | |||
traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict) | |||
kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average | |||
kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average | |||
return kernel | |||
@@ -781,9 +781,9 @@ def get_shortest_paths(G, weight, directed): | |||
Parameters | |||
---------- | |||
G : NetworkX graphs | |||
The graphs whose paths are calculated. | |||
The graphs whose paths are computed. | |||
weight : string/None | |||
edge attribute used as weight to calculate the shortest path. | |||
edge attribute used as weight to compute the shortest path. | |||
directed: boolean | |||
Whether graph is directed. | |||
@@ -822,9 +822,9 @@ def get_sps_as_trie(G, weight, directed): | |||
Parameters | |||
---------- | |||
G : NetworkX graphs | |||
The graphs whose paths are calculated. | |||
The graphs whose paths are computed. | |||
weight : string/None | |||
edge attribute used as weight to calculate the shortest path. | |||
edge attribute used as weight to compute the shortest path. | |||
directed: boolean | |||
Whether graph is directed. | |||
@@ -16,18 +16,18 @@ import numpy as np | |||
import networkx as nx | |||
from control import dlyap | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import RandomWalk | |||
from gklearn.kernels import RandomWalkMeta | |||
class SylvesterEquation(RandomWalk): | |||
class SylvesterEquation(RandomWalkMeta): | |||
def __init__(self, **kwargs): | |||
RandomWalk.__init__(self, **kwargs) | |||
super().__init__(**kwargs) | |||
def _compute_gm_series(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -38,7 +38,7 @@ class SylvesterEquation(RandomWalk): | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
if self._q is None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
if self._verbose >= 2: | |||
@@ -54,16 +54,16 @@ class SylvesterEquation(RandomWalk): | |||
# norm[norm == 0] = 1 | |||
# A_wave_list.append(A_tilde / norm) | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda) | |||
kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
@@ -76,7 +76,7 @@ class SylvesterEquation(RandomWalk): | |||
def _compute_gm_imap_unordered(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_edge_weight(self._graphs, self._verbose) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -85,7 +85,7 @@ class SylvesterEquation(RandomWalk): | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
if self._q is None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
if self._verbose >= 2: | |||
@@ -94,7 +94,7 @@ class SylvesterEquation(RandomWalk): | |||
iterator = self._graphs | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
def init_worker(A_wave_list_toshare): | |||
global G_A_wave_list | |||
G_A_wave_list = A_wave_list_toshare | |||
@@ -113,7 +113,7 @@ class SylvesterEquation(RandomWalk): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -124,24 +124,24 @@ class SylvesterEquation(RandomWalk): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
if self._q is None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) | |||
iterator = tqdm(g_list, desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
iterator = g_list | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda) | |||
kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) | |||
kernel_list[i] = kernel | |||
else: # @todo | |||
@@ -153,7 +153,7 @@ class SylvesterEquation(RandomWalk): | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_edge_weight(g_list + [g1], self._verbose) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -162,17 +162,17 @@ class SylvesterEquation(RandomWalk): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
if self._q is None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) | |||
iterator = tqdm(g_list, desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
iterator = g_list | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._p is None: # p is uniform distribution as default. | |||
def init_worker(A_wave_1_toshare, A_wave_list_toshare): | |||
global G_A_wave_1, G_A_wave_list | |||
G_A_wave_1 = A_wave_1_toshare | |||
@@ -186,7 +186,7 @@ class SylvesterEquation(RandomWalk): | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
@@ -201,7 +201,7 @@ class SylvesterEquation(RandomWalk): | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self._check_edge_weight([g1] + [g2]) | |||
self._check_edge_weight([g1] + [g2], self._verbose) | |||
self._check_graphs([g1] + [g2]) | |||
if self._verbose >= 2: | |||
import warnings | |||
@@ -209,13 +209,13 @@ class SylvesterEquation(RandomWalk): | |||
lmda = self._weight | |||
if self._q == None: | |||
if self._q is None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() | |||
if self._p == None: # p is uniform distribution as default. | |||
kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda) | |||
if self._p is None: # p is uniform distribution as default. | |||
kernel = self._kernel_do(A_wave_1, A_wave_2, lmda) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
@@ -224,7 +224,7 @@ class SylvesterEquation(RandomWalk): | |||
return kernel | |||
def __kernel_do(self, A_wave1, A_wave2, lmda): | |||
def _kernel_do(self, A_wave1, A_wave2, lmda): | |||
S = lmda * A_wave2 | |||
T_t = A_wave1 | |||
@@ -242,4 +242,4 @@ class SylvesterEquation(RandomWalk): | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight) | |||
return i, j, self._kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight) |
@@ -39,7 +39,7 @@ class Treelet(GraphKernel): | |||
def _compute_gm_series(self): | |||
self.__add_dummy_labels(self._graphs) | |||
# get all canonical keys of all graphs before calculating kernels to save | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
canonkeys = [] | |||
if self._verbose >= 2: | |||
@@ -55,7 +55,7 @@ class Treelet(GraphKernel): | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
@@ -69,7 +69,7 @@ class Treelet(GraphKernel): | |||
def _compute_gm_imap_unordered(self): | |||
self.__add_dummy_labels(self._graphs) | |||
# get all canonical keys of all graphs before calculating kernels to save | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
pool = Pool(self._n_jobs) | |||
itr = zip(self._graphs, range(0, len(self._graphs))) | |||
@@ -105,7 +105,7 @@ class Treelet(GraphKernel): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
# get all canonical keys of all graphs before calculating kernels to save | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
canonkeys_1 = self.__get_canonkeys(g1) | |||
canonkeys_list = [] | |||
@@ -119,7 +119,7 @@ class Treelet(GraphKernel): | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
@@ -132,7 +132,7 @@ class Treelet(GraphKernel): | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
# get all canonical keys of all graphs before calculating kernels to save | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
canonkeys_1 = self.__get_canonkeys(g1) | |||
canonkeys_list = [[] for _ in range(len(g_list))] | |||
@@ -167,7 +167,7 @@ class Treelet(GraphKernel): | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||
return kernel_list | |||
@@ -185,7 +185,7 @@ class Treelet(GraphKernel): | |||
def __kernel_do(self, canonkey1, canonkey2): | |||
"""Calculate treelet graph kernel between 2 graphs. | |||
"""Compute treelet graph kernel between 2 graphs. | |||
Parameters | |||
---------- | |||
@@ -29,15 +29,15 @@ def treeletkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate treelet graph kernels between graphs. | |||
"""Compute treelet graph kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
sub_kernel : function | |||
The sub-kernel between 2 real number vectors. Each vector counts the | |||
@@ -89,7 +89,7 @@ def treeletkernel(*args, | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
if parallel == 'imap_unordered': | |||
# get all canonical keys of all graphs before calculating kernels to save | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
pool = Pool(n_jobs) | |||
itr = zip(Gn, range(0, len(Gn))) | |||
@@ -120,8 +120,8 @@ def treeletkernel(*args, | |||
glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | |||
# ---- do not use parallelization. ---- | |||
elif parallel == None: | |||
# get all canonical keys of all graphs before calculating kernels to save | |||
elif parallel is None: | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
canonkeys = [] | |||
for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): | |||
@@ -148,7 +148,7 @@ def treeletkernel(*args, | |||
def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): | |||
"""Calculate treelet graph kernel between 2 graphs. | |||
"""Compute treelet graph kernel between 2 graphs. | |||
Parameters | |||
---------- | |||
@@ -210,7 +210,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed): | |||
# n-star patterns | |||
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] | |||
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] | |||
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] # @todo: check self loop. | |||
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] | |||
# n-star patterns | |||
canonkey['6'] = len(patterns['3star']) | |||
@@ -34,15 +34,15 @@ def untilhpathkernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate path graph kernels up to depth/hight h between graphs. | |||
"""Compute path graph kernels up to depth/hight h between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
node_label : string | |||
Node attribute used as label. The default node label is atom. | |||
@@ -91,7 +91,7 @@ def untilhpathkernel(*args, | |||
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | |||
'edge_attr_dim', 'is_directed'], | |||
node_label=node_label, edge_label=edge_label) | |||
if k_func != None: | |||
if k_func is not None: | |||
if not ds_attrs['node_labeled']: | |||
for G in Gn: | |||
nx.set_node_attributes(G, '0', 'atom') | |||
@@ -103,7 +103,7 @@ def untilhpathkernel(*args, | |||
if parallel == 'imap_unordered': | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
# get all paths of all graphs before calculating kernels to save time, | |||
# get all paths of all graphs before computing kernels to save time, | |||
# but this may cost a lot of memory for large datasets. | |||
pool = Pool(n_jobs) | |||
itr = zip(Gn, range(0, len(Gn))) | |||
@@ -113,10 +113,10 @@ def untilhpathkernel(*args, | |||
else: | |||
chunksize = 100 | |||
all_paths = [[] for _ in range(len(Gn))] | |||
if compute_method == 'trie' and k_func != None: | |||
if compute_method == 'trie' and k_func is not None: | |||
getps_partial = partial(wrapper_find_all_path_as_trie, depth, | |||
ds_attrs, node_label, edge_label) | |||
elif compute_method != 'trie' and k_func != None: | |||
elif compute_method != 'trie' and k_func is not None: | |||
getps_partial = partial(wrapper_find_all_paths_until_length, depth, | |||
ds_attrs, node_label, edge_label, True) | |||
else: | |||
@@ -133,9 +133,9 @@ def untilhpathkernel(*args, | |||
pool.join() | |||
# for g in Gn: | |||
# if compute_method == 'trie' and k_func != None: | |||
# if compute_method == 'trie' and k_func is not None: | |||
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | |||
# elif compute_method != 'trie' and k_func != None: | |||
# elif compute_method != 'trie' and k_func is not None: | |||
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) | |||
# else: | |||
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False) | |||
@@ -155,14 +155,14 @@ def untilhpathkernel(*args, | |||
## all_paths[i] = ps | |||
## print(time.time() - ttt) | |||
if compute_method == 'trie' and k_func != None: | |||
if compute_method == 'trie' and k_func is not None: | |||
def init_worker(trie_toshare): | |||
global G_trie | |||
G_trie = trie_toshare | |||
do_partial = partial(wrapper_uhpath_do_trie, k_func) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | |||
elif compute_method != 'trie' and k_func != None: | |||
elif compute_method != 'trie' and k_func is not None: | |||
def init_worker(plist_toshare): | |||
global G_plist | |||
G_plist = plist_toshare | |||
@@ -177,7 +177,7 @@ def untilhpathkernel(*args, | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | |||
elif parallel == None: | |||
elif parallel is None: | |||
# from pympler import asizeof | |||
# ---- direct running, normally use single CPU core. ---- | |||
# print(asizeof.asized(all_paths, detail=1).format()) | |||
@@ -195,7 +195,7 @@ def untilhpathkernel(*args, | |||
# print(sizeof_allpaths) | |||
pbar = tqdm( | |||
total=((len(Gn) + 1) * len(Gn) / 2), | |||
desc='calculating kernels', | |||
desc='Computing kernels', | |||
file=sys.stdout) | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
@@ -217,7 +217,7 @@ def untilhpathkernel(*args, | |||
# print(sizeof_allpaths) | |||
pbar = tqdm( | |||
total=((len(Gn) + 1) * len(Gn) / 2), | |||
desc='calculating kernels', | |||
desc='Computing kernels', | |||
file=sys.stdout) | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
@@ -236,7 +236,7 @@ def untilhpathkernel(*args, | |||
def _untilhpathkernel_do_trie(trie1, trie2, k_func): | |||
"""Calculate path graph kernels up to depth d between 2 graphs using trie. | |||
"""Compute path graph kernels up to depth d between 2 graphs using trie. | |||
Parameters | |||
---------- | |||
@@ -351,7 +351,7 @@ def wrapper_uhpath_do_trie(k_func, itr): | |||
def _untilhpathkernel_do_naive(paths1, paths2, k_func): | |||
"""Calculate path graph kernels up to depth d between 2 graphs naively. | |||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||
Parameters | |||
---------- | |||
@@ -400,7 +400,7 @@ def wrapper_uhpath_do_naive(k_func, itr): | |||
def _untilhpathkernel_do_kernelless(paths1, paths2, k_func): | |||
"""Calculate path graph kernels up to depth d between 2 graphs naively. | |||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||
Parameters | |||
---------- | |||
@@ -32,15 +32,15 @@ def weisfeilerlehmankernel(*args, | |||
n_jobs=None, | |||
chunksize=None, | |||
verbose=True): | |||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
G1, G2 : NetworkX graphs | |||
Two graphs between which the kernel is calculated. | |||
Two graphs between which the kernel is computed. | |||
node_label : string | |||
Node attribute used as label. The default node label is atom. | |||
@@ -115,12 +115,12 @@ def weisfeilerlehmankernel(*args, | |||
def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose): | |||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -146,7 +146,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz | |||
# number of occurence of each label in G | |||
all_num_of_each_label.append(dict(Counter(labels_ori))) | |||
# calculate subtree kernel with the 0th iteration and add it to the final kernel | |||
# Compute subtree kernel with the 0th iteration and add it to the final kernel | |||
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) | |||
# iterate each height | |||
@@ -255,7 +255,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz | |||
# all_labels_ori.update(labels_comp) | |||
all_num_of_each_label.append(dict(Counter(labels_comp))) | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) | |||
return Kmatrix | |||
@@ -316,7 +316,7 @@ def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, | |||
do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | |||
elif parallel == None: | |||
elif parallel is None: | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], | |||
@@ -345,12 +345,12 @@ def wrapper_compute_subtree_kernel(Kmatrix, itr): | |||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs. | |||
"""Compute Weisfeiler-Lehman shortest path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -413,7 +413,7 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
@@ -427,12 +427,12 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||
def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman edge kernels between graphs. | |||
"""Compute Weisfeiler-Lehman edge kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -491,7 +491,7 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
@@ -504,12 +504,12 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | |||
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -564,7 +564,7 @@ def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate kernel with h iterations and add it to the final kernel | |||
# Compute kernel with h iterations and add it to the final kernel | |||
Kmatrix += base_kernel(Gn, node_label, edge_label) | |||
return Kmatrix |
@@ -125,12 +125,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def __subtree_kernel_do(self, Gn): | |||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
Return | |||
------ | |||
@@ -152,7 +152,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
# number of occurence of each label in G | |||
all_num_of_each_label.append(dict(Counter(labels_ori))) | |||
# calculate subtree kernel with the 0th iteration and add it to the final kernel. | |||
# Compute subtree kernel with the 0th iteration and add it to the final kernel. | |||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||
# iterate each height | |||
@@ -198,7 +198,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
# all_labels_ori.update(labels_comp) | |||
all_num_of_each_label.append(dict(Counter(labels_comp))) | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||
return gram_matrix | |||
@@ -244,12 +244,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs. | |||
"""Compute Weisfeiler-Lehman shortest path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -312,7 +312,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
@@ -326,12 +326,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman edge kernels between graphs. | |||
"""Compute Weisfeiler-Lehman edge kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -390,7 +390,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
@@ -403,12 +403,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | |||
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
List of graphs between which the kernels are computed. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
@@ -463,7 +463,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate kernel with h iterations and add it to the final kernel | |||
# Compute kernel with h iterations and add it to the final kernel | |||
gram_matrix += base_kernel(Gn, node_label, edge_label) | |||
return gram_matrix | |||
@@ -13,6 +13,7 @@ import os | |||
class Dataset(object): | |||
def __init__(self, filename=None, filename_targets=None, **kwargs): | |||
if filename is None: | |||
self.__graphs = None | |||
@@ -180,13 +181,13 @@ class Dataset(object): | |||
# return 0 | |||
def get_dataset_infos(self, keys=None): | |||
def get_dataset_infos(self, keys=None, params=None): | |||
"""Computes and returns the structure and property information of the graph dataset. | |||
Parameters | |||
---------- | |||
keys : list | |||
List of strings which indicate which informations will be returned. The | |||
keys : list, optional | |||
A list of strings which indicate which informations will be returned. The | |||
possible choices includes: | |||
'substructures': sub-structures graphs contains, including 'linear', 'non | |||
@@ -241,7 +242,15 @@ class Dataset(object): | |||
'class_number': number of classes. Only available for classification problems. | |||
'all_degree_entropy': the entropy of degree distribution of each graph. | |||
'ave_degree_entropy': the average entropy of degree distribution of all graphs. | |||
All informations above will be returned if `keys` is not given. | |||
params: dict of dict, optional | |||
A dictinary which contains extra parameters for each possible | |||
element in ``keys``. | |||
Return | |||
------ | |||
@@ -276,6 +285,8 @@ class Dataset(object): | |||
'node_attr_dim', | |||
'edge_attr_dim', | |||
'class_number', | |||
'all_degree_entropy', | |||
'ave_degree_entropy' | |||
] | |||
# dataset size | |||
@@ -420,6 +431,22 @@ class Dataset(object): | |||
self.__edge_attr_dim = self.__get_edge_attr_dim() | |||
infos['edge_attr_dim'] = self.__edge_attr_dim | |||
# entropy of degree distribution. | |||
if 'all_degree_entropy' in keys: | |||
if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']): | |||
base = params['all_degree_entropy']['base'] | |||
else: | |||
base = None | |||
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) | |||
if 'ave_degree_entropy' in keys: | |||
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): | |||
base = params['ave_degree_entropy']['base'] | |||
else: | |||
base = None | |||
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) | |||
return infos | |||
@@ -653,8 +680,7 @@ class Dataset(object): | |||
def __get_all_fill_factors(self): | |||
""" | |||
Get fill factor, the number of non-zero entries in the adjacency matrix. | |||
"""Get fill factor, the number of non-zero entries in the adjacency matrix. | |||
Returns | |||
------- | |||
@@ -721,7 +747,30 @@ class Dataset(object): | |||
def __get_edge_attr_dim(self): | |||
return len(self.__edge_attrs) | |||
def __compute_all_degree_entropy(self, base=None): | |||
"""Compute the entropy of degree distribution of each graph. | |||
Parameters | |||
---------- | |||
base : float, optional | |||
The logarithmic base to use. The default is ``e`` (natural logarithm). | |||
Returns | |||
------- | |||
degree_entropy : float | |||
The calculated entropy. | |||
""" | |||
from gklearn.utils.stats import entropy | |||
degree_entropy = [] | |||
for g in self.__graphs: | |||
degrees = list(dict(g.degree()).values()) | |||
en = entropy(degrees, base=base) | |||
degree_entropy.append(en) | |||
return degree_entropy | |||
@property | |||
def graphs(self): | |||
@@ -0,0 +1,52 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Oct 7 14:43:36 2020 | |||
@author: ljia | |||
""" | |||
def rounder(x, decimals): | |||
"""Round, where 5 is rounded up. | |||
Parameters | |||
---------- | |||
x : float | |||
The number to be rounded. | |||
decimals : int | |||
Decimals to which ``x'' is rounded. | |||
Returns | |||
------- | |||
string | |||
The rounded number. | |||
""" | |||
x_strs = str(x).split('.') | |||
if len(x_strs) == 2: | |||
before = x_strs[0] | |||
after = x_strs[1] | |||
if len(after) > decimals: | |||
if int(after[decimals]) >= 5: | |||
after0s = '' | |||
for c in after: | |||
if c == '0': | |||
after0s += '0' | |||
elif c != '0': | |||
break | |||
if len(after0s) == decimals: | |||
after0s = after0s[:-1] | |||
after = after0s + str(int(after[0:decimals]) + 1)[-decimals:] | |||
else: | |||
after = after[0:decimals] | |||
elif len(after) < decimals: | |||
after += '0' * (decimals - len(after)) | |||
return before + '.' + after | |||
elif len(x_strs) == 1: | |||
return x_strs[0] | |||
if __name__ == '__main__': | |||
x = 1.0075333616 | |||
y = rounder(x, 2) | |||
print(y) |
@@ -63,4 +63,4 @@ def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None, | |||
len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs, | |||
chunksize=chunksize, itr_desc='calculating kernels', verbose=verbose) | |||
chunksize=chunksize, itr_desc='Computing kernels', verbose=verbose) |
@@ -0,0 +1,27 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Oct 5 15:12:41 2020 | |||
@author: ljia | |||
""" | |||
from collections import Counter | |||
from scipy import stats | |||
def entropy(labels, base=None): | |||
"""Calculate the entropy of a distribution for given list of labels. | |||
Parameters | |||
---------- | |||
labels : list | |||
Given list of labels. | |||
base : float, optional | |||
The logarithmic base to use. The default is ``e`` (natural logarithm). | |||
Returns | |||
------- | |||
float | |||
The calculated entropy. | |||
""" | |||
return stats.entropy(list(Counter(labels).values()), base=base) |
@@ -565,6 +565,86 @@ def compute_distance_matrix(gram_matrix): | |||
return dis_mat, dis_max, dis_min, dis_mean | |||
# @todo: use it in ShortestPath. | |||
def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]): | |||
"""Compute kernels between each pair of vertices in two graphs. | |||
Parameters | |||
---------- | |||
g1, g2 : NetworkX graph | |||
The kernels bewteen pairs of vertices in these two graphs are computed. | |||
node_kernels : dict | |||
A dictionary of kernel functions for nodes, including 3 items: 'symb' | |||
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' | |||
for both labels. The first 2 functions take two node labels as | |||
parameters, and the 'mix' function takes 4 parameters, a symbolic and a | |||
non-symbolic label for each the two nodes. Each label is in form of 2-D | |||
dimension array (n_samples, n_features). Each function returns a number | |||
as the kernel value. Ignored when nodes are unlabeled. This argument | |||
is designated to conjugate gradient method and fixed-point iterations. | |||
node_labels : list, optional | |||
The list of the name strings of the node labels. The default is []. | |||
node_attrs : list, optional | |||
The list of the name strings of the node attributes. The default is []. | |||
Returns | |||
------- | |||
vk_dict : dict | |||
Vertex kernels keyed by vertices. | |||
Notes | |||
----- | |||
This function is used by ``gklearn.kernels.FixedPoint'' and | |||
``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1]. | |||
References | |||
---------- | |||
.. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang. | |||
Parallelization of shortest path graph kernels on multi-core cpus and gpus. | |||
Proceedings of the Programmability Issues for Heterogeneous Multicores | |||
(MultiProg), Vienna, Austria, 2014. | |||
""" | |||
vk_dict = {} # shortest path matrices dict | |||
if len(node_labels) > 0: | |||
# node symb and non-synb labeled | |||
if len(node_attrs) > 0: | |||
kn = node_kernels['mix'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_labels = [n1[1][nl] for nl in node_labels] | |||
n2_labels = [n2[1][nl] for nl in node_labels] | |||
n1_attrs = [n1[1][na] for na in node_attrs] | |||
n2_attrs = [n2[1][na] for na in node_attrs] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) | |||
# node symb labeled | |||
else: | |||
kn = node_kernels['symb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_labels = [n1[1][nl] for nl in node_labels] | |||
n2_labels = [n2[1][nl] for nl in node_labels] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) | |||
else: | |||
# node non-synb labeled | |||
if len(node_attrs) > 0: | |||
kn = node_kernels['nsymb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_attrs = [n1[1][na] for na in node_attrs] | |||
n2_attrs = [n2[1][na] for na in node_attrs] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) | |||
# node unlabeled | |||
else: | |||
pass # @todo: add edge weights. | |||
# for e1 in g1.edges(data=True): | |||
# for e2 in g2.edges(data=True): | |||
# if e1[2]['cost'] == e2[2]['cost']: | |||
# kernel += 1 | |||
# return kernel | |||
return vk_dict | |||
def dummy_node(): | |||
""" | |||
/*! | |||
@@ -8,7 +8,7 @@ with open('requirements_pypi.txt') as fp: | |||
setuptools.setup( | |||
name="graphkit-learn", | |||
version="0.2.0", | |||
version="0.2.1", | |||
author="Linlin Jia", | |||
author_email="linlin.jia@insa-rouen.fr", | |||
description="A Python library for graph kernels, graph edit distances, and graph pre-images", | |||