Browse Source

Merge pull request #33 from jajupmochi/v0.2.x

V0.2.x
master
linlin GitHub 4 years ago
parent
commit
27f2c4427a
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
43 changed files with 3145 additions and 2173 deletions
  1. +5
    -0
      crowdin.yml
  2. +2
    -3
      docs/source/experiments.rst
  3. +22
    -23
      docs/source/figures/all_ave_gm_times.svg
  4. +740
    -714
      docs/source/figures/all_test_accuracy.svg
  5. +196
    -0
      gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py
  6. +6
    -6
      gklearn/experiments/papers/PRL_2020/runtimes_28cores.py
  7. +9
    -9
      gklearn/experiments/papers/PRL_2020/runtimes_diff_chunksizes.py
  8. +3
    -3
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py
  9. +3
    -3
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_degrees.py
  10. +3
    -3
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_el.py
  11. +3
    -3
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nl.py
  12. +3
    -3
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py
  13. +122
    -1
      gklearn/experiments/papers/PRL_2020/utils.py
  14. +5
    -2
      gklearn/kernels/__init__.py
  15. +12
    -12
      gklearn/kernels/commonWalkKernel.py
  16. +7
    -7
      gklearn/kernels/common_walk.py
  17. +322
    -0
      gklearn/kernels/conjugate_gradient.py
  18. +218
    -142
      gklearn/kernels/fixed_point.py
  19. +2
    -2
      gklearn/kernels/graph_kernel.py
  20. +11
    -11
      gklearn/kernels/marginalized.py
  21. +16
    -16
      gklearn/kernels/marginalizedKernel.py
  22. +8
    -8
      gklearn/kernels/path_up_to_h.py
  23. +43
    -43
      gklearn/kernels/randomWalkKernel.py
  24. +22
    -60
      gklearn/kernels/random_walk.py
  25. +86
    -0
      gklearn/kernels/random_walk_meta.py
  26. +3
    -3
      gklearn/kernels/shortest_path.py
  27. +5
    -5
      gklearn/kernels/spKernel.py
  28. +23
    -23
      gklearn/kernels/spectral_decomposition.py
  29. +6
    -39
      gklearn/kernels/structural_sp.py
  30. +13
    -13
      gklearn/kernels/structuralspKernel.py
  31. +30
    -30
      gklearn/kernels/sylvester_equation.py
  32. +8
    -8
      gklearn/kernels/treelet.py
  33. +8
    -8
      gklearn/kernels/treeletKernel.py
  34. +17
    -17
      gklearn/kernels/untilHPathKernel.py
  35. +17
    -17
      gklearn/kernels/weisfeilerLehmanKernel.py
  36. +13
    -13
      gklearn/kernels/weisfeiler_lehman.py
  37. +54
    -5
      gklearn/utils/dataset.py
  38. +52
    -0
      gklearn/utils/math.py
  39. +918
    -916
      gklearn/utils/model_selection_precomputed.py
  40. +1
    -1
      gklearn/utils/parallel.py
  41. +27
    -0
      gklearn/utils/stats.py
  42. +80
    -0
      gklearn/utils/utils.py
  43. +1
    -1
      setup.py

+ 5
- 0
crowdin.yml View File

@@ -0,0 +1,5 @@
files:
- source: /**/
ignore:
- /datasets/
translation: /lang/%two_letters_code%/%original_path%/%original_file_name%

+ 2
- 3
docs/source/experiments.rst View File

@@ -7,15 +7,14 @@ A two-layer nested cross-validation (CV) is applied to select and evaluate model


The machine used to execute the experiments is a cluster with 28 CPU cores of Intel(R) Xeon(R) E5-2680 v4 @ 2.40GHz, 252GB memory, and 64-bit operating system CentOS Linux release 7.3.1611. All results were run with Python 3.5.2. The machine used to execute the experiments is a cluster with 28 CPU cores of Intel(R) Xeon(R) E5-2680 v4 @ 2.40GHz, 252GB memory, and 64-bit operating system CentOS Linux release 7.3.1611. All results were run with Python 3.5.2.


The figure below exhibits accuracies achieved by graph kernels implemented in `graphkit-learn` library. Each row corresponds to a dataset and each column to a graph kernel. Accuracies are in percentage for classification and in terms of errors of boiling points for regression (Alkane and
Acyclic datasets). Red color indicates a worse result and green a better one. Gray cells with the “inf” marker indicate that the computation of the graph kernel on the dataset is neglected due to much higher consumption of computational resources than other kernels.
The figure below exhibits accuracies achieved by graph kernels implemented in `graphkit-learn` library, in terms of regression error (the upper table) and classification rate (the lower table). Red color indicates the worse results and dark green the best ones. Gray cells with the “inf” marker indicate that the computation of the graph kernel on the dataset is omitted due to much higher consumption of computational resources than other kernels.


.. image:: figures/all_test_accuracy.svg .. image:: figures/all_test_accuracy.svg
:width: 600 :width: 600
:alt: accuracies :alt: accuracies


The figure below displays computational time consumed to compute Gram matrices of each graph The figure below displays computational time consumed to compute Gram matrices of each graph
kernels (in :math:`log10` of seconds) on each dataset. Colors have the same meaning as in the figure above.
kernels (in :math:`log10` of seconds) on each dataset. Color legends have the same meaning as in the figure above.


.. image:: figures/all_ave_gm_times.svg .. image:: figures/all_ave_gm_times.svg
:width: 600 :width: 600


+ 22
- 23
docs/source/figures/all_ave_gm_times.svg View File

@@ -1367,7 +1367,7 @@ Q 28.265625 36.71875 33.203125 36.71875
z z
" id="DejaVuSans-Bold-57"/> " id="DejaVuSans-Bold-57"/>
</defs> </defs>
<g transform="translate(298.715937 164.915897)scale(0.1 -0.1)">
<g transform="translate(298.715938 164.915897)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-45"/> <use xlink:href="#DejaVuSans-Bold-45"/>
<use x="41.503906" xlink:href="#DejaVuSans-Bold-48"/> <use x="41.503906" xlink:href="#DejaVuSans-Bold-48"/>
<use x="111.083984" xlink:href="#DejaVuSans-Bold-46"/> <use x="111.083984" xlink:href="#DejaVuSans-Bold-46"/>
@@ -1903,7 +1903,7 @@ z
</g> </g>
<g id="text_42"> <g id="text_42">
<!-- 0.14 --> <!-- 0.14 -->
<g transform="translate(300.790937 210.82894)scale(0.1 -0.1)">
<g transform="translate(300.790938 210.82894)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-48"/> <use xlink:href="#DejaVuSans-Bold-48"/>
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> <use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/>
<use x="107.568359" xlink:href="#DejaVuSans-Bold-49"/> <use x="107.568359" xlink:href="#DejaVuSans-Bold-49"/>
@@ -2024,10 +2024,10 @@ L 9.1875 0
z z
" id="DejaVuSans-Bold-72"/> " id="DejaVuSans-Bold-72"/>
</defs> </defs>
<g transform="translate(51.75075 233.785462)scale(0.1 -0.1)">
<g transform="translate(52.649188 233.785462)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-80"/> <use xlink:href="#DejaVuSans-Bold-80"/>
<use x="73.150391" xlink:href="#DejaVuSans-Bold-65"/>
<use x="150.542969" xlink:href="#DejaVuSans-Bold-72"/>
<use x="64.166016" xlink:href="#DejaVuSans-Bold-65"/>
<use x="141.558594" xlink:href="#DejaVuSans-Bold-72"/>
</g> </g>
</g> </g>
<g id="patch_50"> <g id="patch_50">
@@ -2126,7 +2126,7 @@ z
</g> </g>
<g id="text_54"> <g id="text_54">
<!-- 0.37 --> <!-- 0.37 -->
<g transform="translate(300.790937 233.785462)scale(0.1 -0.1)">
<g transform="translate(300.790938 233.785462)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-48"/> <use xlink:href="#DejaVuSans-Bold-48"/>
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> <use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/>
<use x="107.568359" xlink:href="#DejaVuSans-Bold-51"/> <use x="107.568359" xlink:href="#DejaVuSans-Bold-51"/>
@@ -2231,7 +2231,7 @@ z
</g> </g>
<g id="text_60"> <g id="text_60">
<!-- Mutag --> <!-- Mutag -->
<g transform="translate(39.417937 256.663859)scale(0.1 -0.1)">
<g transform="translate(39.417938 256.663859)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-77"/> <use xlink:href="#DejaVuSans-Bold-77"/>
<use x="99.511719" xlink:href="#DejaVuSans-Bold-117"/> <use x="99.511719" xlink:href="#DejaVuSans-Bold-117"/>
<use x="170.703125" xlink:href="#DejaVuSans-Bold-116"/> <use x="170.703125" xlink:href="#DejaVuSans-Bold-116"/>
@@ -2334,7 +2334,7 @@ z
</g> </g>
<g id="text_66"> <g id="text_66">
<!-- 0.77 --> <!-- 0.77 -->
<g transform="translate(300.790937 256.741984)scale(0.1 -0.1)">
<g transform="translate(300.790938 256.741984)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-48"/> <use xlink:href="#DejaVuSans-Bold-48"/>
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> <use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/>
<use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/> <use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/>
@@ -2546,7 +2546,7 @@ z
</g> </g>
<g id="text_78"> <g id="text_78">
<!-- 1.78 --> <!-- 1.78 -->
<g transform="translate(300.790937 279.698505)scale(0.1 -0.1)">
<g transform="translate(300.790938 279.698505)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-49"/> <use xlink:href="#DejaVuSans-Bold-49"/>
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> <use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/>
<use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/> <use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/>
@@ -2663,7 +2663,7 @@ L 9.1875 0
z z
" id="DejaVuSans-Bold-69"/> " id="DejaVuSans-Bold-69"/>
</defs> </defs>
<g transform="translate(25.730437 302.576902)scale(0.1 -0.1)">
<g transform="translate(25.730438 302.576902)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-69"/> <use xlink:href="#DejaVuSans-Bold-69"/>
<use x="68.310547" xlink:href="#DejaVuSans-Bold-110"/> <use x="68.310547" xlink:href="#DejaVuSans-Bold-110"/>
<use x="139.501953" xlink:href="#DejaVuSans-Bold-122"/> <use x="139.501953" xlink:href="#DejaVuSans-Bold-122"/>
@@ -2896,7 +2896,7 @@ L 9.1875 0
z z
" id="DejaVuSans-Bold-68"/> " id="DejaVuSans-Bold-68"/>
</defs> </defs>
<g transform="translate(48.211687 325.611549)scale(0.1 -0.1)">
<g transform="translate(48.211688 325.611549)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-65"/> <use xlink:href="#DejaVuSans-Bold-65"/>
<use x="77.392578" xlink:href="#DejaVuSans-Bold-73"/> <use x="77.392578" xlink:href="#DejaVuSans-Bold-73"/>
<use x="114.599609" xlink:href="#DejaVuSans-Bold-68"/> <use x="114.599609" xlink:href="#DejaVuSans-Bold-68"/>
@@ -2997,7 +2997,7 @@ z
</g> </g>
<g id="text_102"> <g id="text_102">
<!-- 3.74 --> <!-- 3.74 -->
<g transform="translate(300.790937 325.611549)scale(0.1 -0.1)">
<g transform="translate(300.790938 325.611549)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-51"/> <use xlink:href="#DejaVuSans-Bold-51"/>
<use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/> <use x="69.580078" xlink:href="#DejaVuSans-Bold-46"/>
<use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/> <use x="107.568359" xlink:href="#DejaVuSans-Bold-55"/>
@@ -3097,7 +3097,7 @@ z
" style="fill:#ffffff;stroke:#ffffff;stroke-linejoin:miter;stroke-width:0.1;"/> " style="fill:#ffffff;stroke:#ffffff;stroke-linejoin:miter;stroke-width:0.1;"/>
</g> </g>
<g id="text_108"> <g id="text_108">
<!-- NCI11 -->
<!-- NCI1 -->
<defs> <defs>
<path d="M 9.1875 72.90625 <path d="M 9.1875 72.90625
L 30.171875 72.90625 L 30.171875 72.90625
@@ -3132,12 +3132,11 @@ Q 61.765625 15.53125 67 19.09375
z z
" id="DejaVuSans-Bold-67"/> " id="DejaVuSans-Bold-67"/>
</defs> </defs>
<g transform="translate(41.830437 348.568071)scale(0.1 -0.1)">
<g transform="translate(48.78825 348.568071)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-78"/> <use xlink:href="#DejaVuSans-Bold-78"/>
<use x="83.691406" xlink:href="#DejaVuSans-Bold-67"/> <use x="83.691406" xlink:href="#DejaVuSans-Bold-67"/>
<use x="157.080078" xlink:href="#DejaVuSans-Bold-73"/> <use x="157.080078" xlink:href="#DejaVuSans-Bold-73"/>
<use x="194.287109" xlink:href="#DejaVuSans-Bold-49"/> <use x="194.287109" xlink:href="#DejaVuSans-Bold-49"/>
<use x="263.867188" xlink:href="#DejaVuSans-Bold-49"/>
</g> </g>
</g> </g>
<g id="patch_110"> <g id="patch_110">
@@ -3231,7 +3230,7 @@ z
</g> </g>
<g id="text_114"> <g id="text_114">
<!-- inf --> <!-- inf -->
<g style="fill:#ffffff;" transform="translate(305.678437 348.568071)scale(0.1 -0.1)">
<g style="fill:#ffffff;" transform="translate(305.678438 348.568071)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-105"/> <use xlink:href="#DejaVuSans-Bold-105"/>
<use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/> <use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/>
<use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/> <use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/>
@@ -3432,7 +3431,7 @@ z
</g> </g>
<g id="text_126"> <g id="text_126">
<!-- inf --> <!-- inf -->
<g style="fill:#ffffff;" transform="translate(305.678437 371.524592)scale(0.1 -0.1)">
<g style="fill:#ffffff;" transform="translate(305.678438 371.524592)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-105"/> <use xlink:href="#DejaVuSans-Bold-105"/>
<use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/> <use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/>
<use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/> <use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/>
@@ -3669,7 +3668,7 @@ z
</g> </g>
<g id="text_138"> <g id="text_138">
<!-- inf --> <!-- inf -->
<g style="fill:#ffffff;" transform="translate(305.678437 394.481114)scale(0.1 -0.1)">
<g style="fill:#ffffff;" transform="translate(305.678438 394.481114)scale(0.1 -0.1)">
<use xlink:href="#DejaVuSans-Bold-105"/> <use xlink:href="#DejaVuSans-Bold-105"/>
<use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/> <use x="34.277344" xlink:href="#DejaVuSans-Bold-110"/>
<use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/> <use x="105.46875" xlink:href="#DejaVuSans-Bold-102"/>
@@ -3759,29 +3758,29 @@ z
</g> </g>
</g> </g>
<g id="line2d_1"> <g id="line2d_1">
<path clip-path="url(#p7be840e85f)" d="M 82.726875 150.678261
<path clip-path="url(#p0d8d08ad84)" d="M 82.726875 150.678261
L 543.526875 150.678261 L 543.526875 150.678261
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/>
</g> </g>
<g id="line2d_2"> <g id="line2d_2">
<path clip-path="url(#p7be840e85f)" d="M 82.726875 403.2
<path clip-path="url(#p0d8d08ad84)" d="M 82.726875 403.2
L 543.526875 403.2 L 543.526875 403.2
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/>
</g> </g>
<g id="line2d_3"> <g id="line2d_3">
<path clip-path="url(#p7be840e85f)" d="M 82.726875 403.2
<path clip-path="url(#p0d8d08ad84)" d="M 82.726875 403.2
L 82.726875 150.678261 L 82.726875 150.678261
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/>
</g> </g>
<g id="line2d_4"> <g id="line2d_4">
<path clip-path="url(#p7be840e85f)" d="M 543.526875 403.2
<path clip-path="url(#p0d8d08ad84)" d="M 543.526875 403.2
L 543.526875 150.678261 L 543.526875 150.678261
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/> " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-width:2;"/>
</g> </g>
</g> </g>
</g> </g>
<defs> <defs>
<clipPath id="p7be840e85f">
<clipPath id="p0d8d08ad84">
<rect height="396" width="460.8" x="82.726875" y="7.2"/> <rect height="396" width="460.8" x="82.726875" y="7.2"/>
</clipPath> </clipPath>
</defs> </defs>


+ 740
- 714
docs/source/figures/all_test_accuracy.svg
File diff suppressed because it is too large
View File


+ 196
- 0
gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py View File

@@ -0,0 +1,196 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 5 16:08:33 2020

@author: ljia

This script compute classification accuracy of each geaph kernel on datasets
with different entropy of degree distribution.
"""
from utils import Graph_Kernel_List, cross_validate
import numpy as np
import logging

num_nodes = 40
half_num_graphs = 100


def generate_graphs():
# from gklearn.utils.graph_synthesizer import GraphSynthesizer
# gsyzer = GraphSynthesizer()
# graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
# return graphs
import networkx as nx
degrees11 = [5] * num_nodes
# degrees12 = [2] * num_nodes
degrees12 = [5] * num_nodes
degrees21 = list(range(1, 11)) * 6
# degrees22 = [5 * i for i in list(range(1, 11)) * 6]
degrees22 = list(range(1, 11)) * 6
# method 1
graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)]
graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)]
for g in graphs11:
g.remove_edges_from(nx.selfloop_edges(g))
for g in graphs12:
g.remove_edges_from(nx.selfloop_edges(g))
# method 2: can easily generate isomorphic graphs.
# graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)]
# graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)]
# Add node labels.
for g in graphs11:
for n in g.nodes():
g.nodes[n]['atom'] = 0
for g in graphs12:
for n in g.nodes():
g.nodes[n]['atom'] = 1
graphs1 = graphs11 + graphs12

# method 1: the entorpy of the two classes is not the same.
graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)]

for g in graphs21:
g.remove_edges_from(nx.selfloop_edges(g))
for g in graphs22:
g.remove_edges_from(nx.selfloop_edges(g))
# # method 2: tooo slow, and may fail.
# graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)]
# graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)]

# # method 3: no randomness.
# graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)]
# graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)]

# # method 4:
# graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
# graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
# # method 5: the entorpy of the two classes is not the same.
# graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)]
# graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)]
# # method 6: seems there is no randomness0
# graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]
# graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]

# Add node labels.
for g in graphs21:
for n in g.nodes():
g.nodes[n]['atom'] = 0
for g in graphs22:
for n in g.nodes():
g.nodes[n]['atom'] = 1

graphs2 = graphs21 + graphs22
# # check for isomorphism.
# iso_mat1 = np.zeros((len(graphs1), len(graphs1)))
# num1 = 0
# num2 = 0
# for i in range(len(graphs1)):
# for j in range(i + 1, len(graphs1)):
# if nx.is_isomorphic(graphs1[i], graphs1[j]):
# iso_mat1[i, j] = 1
# iso_mat1[j, i] = 1
# num1 += 1
# print('iso:', num1, ':', i, ',', j)
# else:
# num2 += 1
# print('not iso:', num2, ':', i, ',', j)
#
# iso_mat2 = np.zeros((len(graphs2), len(graphs2)))
# num1 = 0
# num2 = 0
# for i in range(len(graphs2)):
# for j in range(i + 1, len(graphs2)):
# if nx.is_isomorphic(graphs2[i], graphs2[j]):
# iso_mat2[i, j] = 1
# iso_mat2[j, i] = 1
# num1 += 1
# print('iso:', num1, ':', i, ',', j)
# else:
# num2 += 1
# print('not iso:', num2, ':', i, ',', j)
return graphs1, graphs2


def get_infos(graph):
from gklearn.utils import Dataset
ds = Dataset()
ds.load_graphs(graph)
infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree'])
infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy'])
print(infos['ave_degree_entropy'], ',', infos['ave_node_degree'])
return infos


def xp_accuracy_diff_entropy():
# Generate graphs.
graphs1, graphs2 = generate_graphs()

# Compute entropy of degree distribution of the generated graphs.
info11 = get_infos(graphs1[0:half_num_graphs])
info12 = get_infos(graphs1[half_num_graphs:])
info21 = get_infos(graphs2[0:half_num_graphs])
info22 = get_infos(graphs2[half_num_graphs:])

# Run and save.
import pickle
import os
save_dir = 'outputs/accuracy_diff_entropy/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

accuracies = {}
confidences = {}
for kernel_name in Graph_Kernel_List:
print()
print('Kernel:', kernel_name)
accuracies[kernel_name] = []
confidences[kernel_name] = []
for set_i, graphs in enumerate([graphs1, graphs2]):
print()
print('Graph set', set_i)
tmp_graphs = [g.copy() for g in graphs]
targets = [0] * half_num_graphs + [1] * half_num_graphs
accuracy = 'error'
confidence = 'error'
try:
accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1)
except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':')
print(repr(exp))
accuracies[kernel_name].append(accuracy)
confidences[kernel_name].append(confidence)
pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
# Save all.
pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb'))
pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb'))
return


if __name__ == '__main__':
xp_accuracy_diff_entropy()

+ 6
- 6
gklearn/experiments/papers/PRL_2020/runtimes_28cores.py View File

@@ -21,14 +21,14 @@ def xp_runtimes_of_all_28cores():


run_times = {} run_times = {}
for kernel_name in Graph_Kernel_List:
for ds_name in Dataset_List:
print() print()
print('Kernel:', kernel_name)
print('Dataset:', ds_name)
run_times[kernel_name] = []
for ds_name in Dataset_List:
run_times[ds_name] = []
for kernel_name in Graph_Kernel_List:
print() print()
print('Dataset:', ds_name)
print('Kernel:', kernel_name)
# get graphs. # get graphs.
graphs, _ = load_predefined_dataset(ds_name) graphs, _ = load_predefined_dataset(ds_name)
@@ -43,7 +43,7 @@ def xp_runtimes_of_all_28cores():
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception('') logging.exception('')
print(repr(exp)) print(repr(exp))
run_times[kernel_name].append(run_time)
run_times[ds_name].append(run_time)
pickle.dump(run_time, open(save_dir + 'run_time.' + kernel_name + '.' + ds_name + '.pkl', 'wb')) pickle.dump(run_time, open(save_dir + 'run_time.' + kernel_name + '.' + ds_name + '.pkl', 'wb'))


+ 9
- 9
gklearn/experiments/papers/PRL_2020/runtimes_diff_chunksizes.py View File

@@ -20,17 +20,17 @@ def xp_runtimes_diff_chunksizes():
os.makedirs(save_dir) os.makedirs(save_dir)


run_times = {} run_times = {}
for kernel_name in Graph_Kernel_List:
for ds_name in Dataset_List:
print() print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for ds_name in Dataset_List:
print('Dataset:', ds_name)
run_times[ds_name] = []
for kernel_name in Graph_Kernel_List:
print() print()
print('Dataset:', ds_name)
print('Kernel:', kernel_name)
run_times[kernel_name].append([])
run_times[ds_name].append([])
for chunksize in [1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]: for chunksize in [1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]:
print() print()
print('Chunksize:', chunksize) print('Chunksize:', chunksize)
@@ -48,7 +48,7 @@ def xp_runtimes_diff_chunksizes():
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception('') logging.exception('')
print(repr(exp)) print(repr(exp))
run_times[kernel_name][-1].append(run_time)
run_times[ds_name][-1].append(run_time)
pickle.dump(run_time, open(save_dir + 'run_time.' + kernel_name + '.' + ds_name + '.' + str(chunksize) + '.pkl', 'wb')) pickle.dump(run_time, open(save_dir + 'run_time.' + kernel_name + '.' + ds_name + '.' + str(chunksize) + '.pkl', 'wb'))


+ 3
- 3
gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py View File

@@ -16,7 +16,7 @@ def generate_graphs():
return graphs return graphs




def xp_synthesied_graphs_dataset_size():
def xp_synthesized_graphs_dataset_size():
# Generate graphs. # Generate graphs.
graphs = generate_graphs() graphs = generate_graphs()
@@ -43,7 +43,7 @@ def xp_synthesied_graphs_dataset_size():
run_time = 'error' run_time = 'error'
try: try:
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name, n_jobs=1)
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name)
except Exception as exp: except Exception as exp:
print('An exception occured when running this experiment:') print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt' LOG_FILENAME = save_dir + 'error.txt'
@@ -61,4 +61,4 @@ def xp_synthesied_graphs_dataset_size():




if __name__ == '__main__': if __name__ == '__main__':
xp_synthesied_graphs_dataset_size()
xp_synthesized_graphs_dataset_size()

+ 3
- 3
gklearn/experiments/papers/PRL_2020/synthesized_graphs_degrees.py View File

@@ -16,7 +16,7 @@ def generate_graphs(degree):
return graphs return graphs




def xp_synthesied_graphs_degrees():
def xp_synthesized_graphs_degrees():
# Run and save. # Run and save.
import pickle import pickle
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_degrees():
# Compute Gram matrix. # Compute Gram matrix.
run_time = 'error' run_time = 'error'
try: try:
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1)
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
except Exception as exp: except Exception as exp:
print('An exception occured when running this experiment:') print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt' LOG_FILENAME = save_dir + 'error.txt'
@@ -60,4 +60,4 @@ def xp_synthesied_graphs_degrees():




if __name__ == '__main__': if __name__ == '__main__':
xp_synthesied_graphs_degrees()
xp_synthesized_graphs_degrees()

+ 3
- 3
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_el.py View File

@@ -16,7 +16,7 @@ def generate_graphs(num_el_alp):
return graphs return graphs




def xp_synthesied_graphs_num_edge_label_alphabet():
def xp_synthesized_graphs_num_edge_label_alphabet():
# Run and save. # Run and save.
import pickle import pickle
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_num_edge_label_alphabet():
# Compute Gram matrix. # Compute Gram matrix.
run_time = 'error' run_time = 'error'
try: try:
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1)
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
except Exception as exp: except Exception as exp:
print('An exception occured when running this experiment:') print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt' LOG_FILENAME = save_dir + 'error.txt'
@@ -60,4 +60,4 @@ def xp_synthesied_graphs_num_edge_label_alphabet():




if __name__ == '__main__': if __name__ == '__main__':
xp_synthesied_graphs_num_edge_label_alphabet()
xp_synthesized_graphs_num_edge_label_alphabet()

+ 3
- 3
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nl.py View File

@@ -16,7 +16,7 @@ def generate_graphs(num_nl_alp):
return graphs return graphs




def xp_synthesied_graphs_num_node_label_alphabet():
def xp_synthesized_graphs_num_node_label_alphabet():
# Run and save. # Run and save.
import pickle import pickle
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_num_node_label_alphabet():
# Compute Gram matrix. # Compute Gram matrix.
run_time = 'error' run_time = 'error'
try: try:
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1)
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
except Exception as exp: except Exception as exp:
run_times[kernel_name].append('error') run_times[kernel_name].append('error')
print('An exception occured when running this experiment:') print('An exception occured when running this experiment:')
@@ -61,4 +61,4 @@ def xp_synthesied_graphs_num_node_label_alphabet():




if __name__ == '__main__': if __name__ == '__main__':
xp_synthesied_graphs_num_node_label_alphabet()
xp_synthesized_graphs_num_node_label_alphabet()

+ 3
- 3
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py View File

@@ -16,7 +16,7 @@ def generate_graphs(num_nodes):
return graphs return graphs




def xp_synthesied_graphs_num_nodes():
def xp_synthesized_graphs_num_nodes():
# Run and save. # Run and save.
import pickle import pickle
@@ -42,7 +42,7 @@ def xp_synthesied_graphs_num_nodes():
# Compute Gram matrix. # Compute Gram matrix.
run_time = 'error' run_time = 'error'
try: try:
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name, n_jobs=1)
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
except Exception as exp: except Exception as exp:
run_times[kernel_name].append('error') run_times[kernel_name].append('error')
print('An exception occured when running this experiment:') print('An exception occured when running this experiment:')
@@ -61,4 +61,4 @@ def xp_synthesied_graphs_num_nodes():




if __name__ == '__main__': if __name__ == '__main__':
xp_synthesied_graphs_num_nodes()
xp_synthesized_graphs_num_nodes()

+ 122
- 1
gklearn/experiments/papers/PRL_2020/utils.py View File

@@ -6,6 +6,8 @@ Created on Tue Sep 22 11:33:28 2020
@author: ljia @author: ljia
""" """
import multiprocessing import multiprocessing
import numpy as np
from gklearn.utils import model_selection_for_precomputed_kernel




Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk'] Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk']
@@ -60,7 +62,7 @@ def compute_graph_kernel(graphs, kernel_name, n_jobs=multiprocessing.cpu_count()
import functools import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
params = {'compute_method': 'fp', 'weight': 1e-4, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'SpectralDecomposition': elif kernel_name == 'SpectralDecomposition':
from gklearn.kernels.randomWalkKernel import randomwalkkernel from gklearn.kernels.randomWalkKernel import randomwalkkernel
@@ -109,4 +111,123 @@ def compute_graph_kernel(graphs, kernel_name, n_jobs=multiprocessing.cpu_count()
params['verbose'] = True params['verbose'] = True
results = estimator(graphs, **params) results = estimator(graphs, **params)
return results[0], results[1]


def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name='synthesized', n_jobs=multiprocessing.cpu_count()):
param_grid = None
if kernel_name == 'CommonWalk':
from gklearn.kernels.commonWalkKernel import commonwalkkernel
estimator = commonwalkkernel
param_grid_precomputed = [{'compute_method': ['geo'],
'weight': np.linspace(0.01, 0.15, 15)}]
elif kernel_name == 'Marginalized':
from gklearn.kernels.marginalizedKernel import marginalizedkernel
estimator = marginalizedkernel
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
'n_iteration': np.linspace(1, 19, 7),
'remove_totters': [False]}
elif kernel_name == 'SylvesterEquation':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
param_grid_precomputed = {'compute_method': ['sylvester'],
# 'weight': np.linspace(0.01, 0.10, 10)}
'weight': np.logspace(-1, -10, num=10, base=10)}
elif kernel_name == 'ConjugateGradient':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
param_grid_precomputed = {'compute_method': ['conjugate'],
'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
'weight': np.logspace(-1, -10, num=10, base=10)}
elif kernel_name == 'FixedPoint':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
'weight': np.logspace(-3, -10, num=8, base=10)}
elif kernel_name == 'SpectralDecomposition':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
param_grid_precomputed = {'compute_method': ['spectral'],
'weight': np.logspace(-1, -10, num=10, base=10),
'sub_kernel': ['geo', 'exp']}
elif kernel_name == 'ShortestPath':
from gklearn.kernels.spKernel import spkernel
estimator = spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
param_grid_precomputed = {'node_kernels': [sub_kernel]}
elif kernel_name == 'StructuralSP':
from gklearn.kernels.structuralspKernel import structuralspkernel
estimator = structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
param_grid_precomputed = {'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
'compute_method': ['naive']}
elif kernel_name == 'PathUpToH':
from gklearn.kernels.untilHPathKernel import untilhpathkernel
estimator = untilhpathkernel
param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2],
'k_func': ['MinMax', 'tanimoto'], # ['MinMax'], #
'compute_method': ['trie']} # ['MinMax']}
elif kernel_name == 'Treelet':
from gklearn.kernels.treeletKernel import treeletkernel
estimator = treeletkernel
from gklearn.utils.kernels import polynomialkernel
import functools
gkernels = [functools.partial(gaussiankernel, gamma=1 / ga)
# for ga in np.linspace(1, 10, 10)]
for ga in np.logspace(0, 10, num=11, base=10)]
pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11)
for c in np.logspace(0, 10, num=11, base=10)]
param_grid_precomputed = {'sub_kernel': pkernels + gkernels}
elif kernel_name == 'WLSubtree':
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
estimator = weisfeilerlehmankernel
param_grid_precomputed = {'base_kernel': ['subtree'],
'height': np.linspace(0, 10, 11)}
param_grid = {'C': np.logspace(-10, 4, num=29, base=10)}
if param_grid is None:
param_grid = {'C': np.logspace(-10, 10, num=41, base=10)}
results = model_selection_for_precomputed_kernel(
graphs,
estimator,
param_grid_precomputed,
param_grid,
'classification',
NUM_TRIALS=28,
datafile_y=targets,
extra_params=None,
ds_name=ds_name,
output_dir=output_dir,
n_jobs=n_jobs,
read_gm_from_file=False,
verbose=True)
return results[0], results[1] return results[0], results[1]

+ 5
- 2
gklearn/kernels/__init__.py View File

@@ -1,5 +1,5 @@
# -*-coding:utf-8 -*- # -*-coding:utf-8 -*-
"""gklearn - kernels module
"""gklearn - graph kernels module
""" """


# info # info
@@ -10,9 +10,12 @@ __date__ = "November 2018"
from gklearn.kernels.graph_kernel import GraphKernel from gklearn.kernels.graph_kernel import GraphKernel
from gklearn.kernels.common_walk import CommonWalk from gklearn.kernels.common_walk import CommonWalk
from gklearn.kernels.marginalized import Marginalized from gklearn.kernels.marginalized import Marginalized
from gklearn.kernels.random_walk import RandomWalk
from gklearn.kernels.random_walk_meta import RandomWalkMeta
from gklearn.kernels.sylvester_equation import SylvesterEquation from gklearn.kernels.sylvester_equation import SylvesterEquation
from gklearn.kernels.conjugate_gradient import ConjugateGradient
from gklearn.kernels.fixed_point import FixedPoint
from gklearn.kernels.spectral_decomposition import SpectralDecomposition from gklearn.kernels.spectral_decomposition import SpectralDecomposition
from gklearn.kernels.random_walk import RandomWalk
from gklearn.kernels.shortest_path import ShortestPath from gklearn.kernels.shortest_path import ShortestPath
from gklearn.kernels.structural_sp import StructuralSP from gklearn.kernels.structural_sp import StructuralSP
from gklearn.kernels.path_up_to_h import PathUpToH from gklearn.kernels.path_up_to_h import PathUpToH


+ 12
- 12
gklearn/kernels/commonWalkKernel.py View File

@@ -30,15 +30,15 @@ def commonwalkkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate common walk graph kernels between graphs.
"""Compute common walk graph kernels between graphs.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.
node_label : string node_label : string
Node attribute used as symbolic label. The default node label is 'atom'. Node attribute used as symbolic label. The default node label is 'atom'.
edge_label : string edge_label : string
@@ -133,7 +133,7 @@ def commonwalkkernel(*args,
# #
# for i, j, kernel in tqdm( # for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, chunksize), # pool.imap_unordered(do_partial, itr, chunksize),
# desc='calculating kernels',
# desc='computing kernels',
# file=sys.stdout): # file=sys.stdout):
# Kmatrix[i][j] = kernel # Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel # Kmatrix[j][i] = kernel
@@ -145,14 +145,14 @@ def commonwalkkernel(*args,
# # direct product graph method - exponential # # direct product graph method - exponential
# itr = combinations_with_replacement(range(0, len(Gn)), 2) # itr = combinations_with_replacement(range(0, len(Gn)), 2)
# if compute_method == 'exp': # if compute_method == 'exp':
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout):
# Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, # Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
# edge_label, weight) # edge_label, weight)
# Kmatrix[j][i] = Kmatrix[i][j] # Kmatrix[j][i] = Kmatrix[i][j]
# #
# # direct product graph method - geometric # # direct product graph method - geometric
# elif compute_method == 'geo': # elif compute_method == 'geo':
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout):
# Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, # Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
# edge_label, weight) # edge_label, weight)
# Kmatrix[j][i] = Kmatrix[i][j] # Kmatrix[j][i] = Kmatrix[i][j]
@@ -161,7 +161,7 @@ def commonwalkkernel(*args,
# # search all paths use brute force. # # search all paths use brute force.
# elif compute_method == 'brute': # elif compute_method == 'brute':
# n = int(n) # n = int(n)
# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
# # get all paths of all graphs before computing kernels to save time, but this may cost a lot of memory for large dataset.
# all_walks = [ # all_walks = [
# find_all_walks_until_length(Gn[i], n, node_label, edge_label) # find_all_walks_until_length(Gn[i], n, node_label, edge_label)
# for i in range(0, len(Gn)) # for i in range(0, len(Gn))
@@ -185,13 +185,13 @@ def commonwalkkernel(*args,




def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta): def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):
"""Calculate walk graph kernels up to n between 2 graphs using exponential
"""Compute walk graph kernels up to n between 2 graphs using exponential
series. series.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
Node attribute used as label. Node attribute used as label.
edge_label : string edge_label : string
@@ -259,13 +259,13 @@ def wrapper_cw_exp(node_label, edge_label, beta, itr):




def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma): def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):
"""Calculate common walk graph kernels up to n between 2 graphs using
"""Compute common walk graph kernels up to n between 2 graphs using
geometric series. geometric series.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
Node attribute used as label. Node attribute used as label.
edge_label : string edge_label : string
@@ -304,7 +304,7 @@ def _commonwalkkernel_brute(walks1,
node_label='atom', node_label='atom',
edge_label='bond_type', edge_label='bond_type',
labeled=True): labeled=True):
"""Calculate walk graph kernels up to n between 2 graphs.
"""Compute walk graph kernels up to n between 2 graphs.


Parameters Parameters
---------- ----------


+ 7
- 7
gklearn/kernels/common_walk.py View File

@@ -46,7 +46,7 @@ class CommonWalk(GraphKernel):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
@@ -102,7 +102,7 @@ class CommonWalk(GraphKernel):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
@@ -148,7 +148,7 @@ class CommonWalk(GraphKernel):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list
@@ -179,13 +179,13 @@ class CommonWalk(GraphKernel):
def __kernel_do_exp(self, g1, g2, beta): def __kernel_do_exp(self, g1, g2, beta):
"""Calculate common walk graph kernel between 2 graphs using exponential
"""Compute common walk graph kernel between 2 graphs using exponential
series. series.
Parameters Parameters
---------- ----------
g1, g2 : NetworkX graphs g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
Graphs between which the kernels are computed.
beta : integer beta : integer
Weight. Weight.
@@ -231,13 +231,13 @@ class CommonWalk(GraphKernel):
def __kernel_do_geo(self, g1, g2, gamma): def __kernel_do_geo(self, g1, g2, gamma):
"""Calculate common walk graph kernel between 2 graphs using geometric
"""Compute common walk graph kernel between 2 graphs using geometric
series. series.
Parameters Parameters
---------- ----------
g1, g2 : NetworkX graphs g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
Graphs between which the kernels are computed.
gamma : integer gamma : integer
Weight. Weight.


+ 322
- 0
gklearn/kernels/conjugate_gradient.py View File

@@ -0,0 +1,322 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 20 16:09:51 2020

@author: ljia

@references:

[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""

import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from scipy.sparse import identity
from scipy.sparse.linalg import cg
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import RandomWalkMeta
from gklearn.utils.utils import compute_vertex_kernels


class ConjugateGradient(RandomWalkMeta):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._node_kernels = kwargs.get('node_kernels', None)
self._edge_kernels = kwargs.get('edge_kernels', None)
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_attrs = kwargs.get('edge_attrs', [])

def _compute_gm_series(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs)
lmda = self._weight
# Compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
# Reindex nodes using consecutive integers for the convenience of kernel computation.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
else:
iterator = self._graphs
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel

else: # @todo
pass
return gram_matrix
def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs)
# Compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
# @todo: parallel this.
# Reindex nodes using consecutive integers for the convenience of kernel computation.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
else:
iterator = self._graphs
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.

def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)

else: # @todo
pass
return gram_matrix
def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1])
lmda = self._weight
# compute kernel list.
kernel_list = [None] * len(g_list)

# Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
if self._verbose >= 2:
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
else:
iterator = g_list
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.

if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(g1, g_list[i], lmda)
kernel_list[i] = kernel

else: # @todo
pass
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1])
# compute kernel list.
kernel_list = [None] * len(g_list)
# Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
# @todo: parallel this.
if self._verbose >= 2:
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
else:
iterator = g_list
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.

def init_worker(g1_toshare, g_list_toshare):
global G_g1, G_g_list
G_g1 = g1_toshare
G_g_list = g_list_toshare

do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
else: # @todo
pass
return kernel_list


def _wrapper_kernel_list_do(self, itr):
return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight)
def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_graphs([g1] + [g2])
lmda = self._weight
# Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
if self._p is None and self._q is None: # p and q are uniform distributions as default.
kernel = self.__kernel_do(g1, g2, lmda)

else: # @todo
pass
return kernel
def __kernel_do(self, g1, g2, lmda):
# Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = self._compute_vertex_kernels(g1, g2)
# Compute the weight matrix of the direct product graph.
w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((w_dim, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)
def _wrapper_kernel_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight)
def _func_fp(x, p_times, lmda, w_times):
haha = w_times * x
haha = lmda * haha
haha = p_times + haha
return p_times + lmda * np.dot(w_times, x)
def _compute_vertex_kernels(self, g1, g2):
"""Compute vertex kernels between vertices of two graphs.
"""
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
# @todo: move if out to make it faster.
# @todo: node/edge kernels use direct function rather than dicts.
def _compute_weight_matrix(self, g1, g2, vk_dict):
"""Compute the weight matrix of the direct product graph.
"""
# Define edge kernels.
def compute_ek_11(e1, e2, ke):
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
def compute_ek_10(e1, e2, ke):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
return ke(e1_labels, e2_labels)
def compute_ek_01(e1, e2, ke):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
return ke(e1_attrs, e2_attrs)
def compute_ek_00(e1, e2, ke):
return 1
# Select the proper edge kernel.
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
ek_temp = compute_ek_11
# edge symb labeled
else:
ke = self._edge_kernels['symb']
ek_temp = compute_ek_10
else:
# edge non-synb labeled
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
ek_temp = compute_ek_01
# edge unlabeled
else:
ke = None
ek_temp = compute_ek_00 # @todo: check how much slower is this.
# Compute the weight matrix.
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled
if self._ds_infos['directed']:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])]
else: # undirected
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else: # node unlabeled
if self._ds_infos['directed']:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp(e1, e2, ke)
else: # undirected
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp(e1, e2, ke)
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]

return w_times, w_dim

+ 218
- 142
gklearn/kernels/fixed_point.py View File

@@ -14,61 +14,56 @@ import sys
from tqdm import tqdm from tqdm import tqdm
import numpy as np import numpy as np
import networkx as nx import networkx as nx
from control import dlyap
from scipy import optimize
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import RandomWalk
from gklearn.kernels import RandomWalkMeta
from gklearn.utils.utils import compute_vertex_kernels




class FixedPoint(RandomWalk):

class FixedPoint(RandomWalkMeta):
def __init__(self, **kwargs): def __init__(self, **kwargs):
RandomWalk.__init__(self, **kwargs)
super().__init__(**kwargs)
self._node_kernels = kwargs.get('node_kernels', None)
self._edge_kernels = kwargs.get('edge_kernels', None)
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_attrs = kwargs.get('edge_attrs', [])


def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs)
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
lmda = self._weight lmda = self._weight
# compute Gram matrix.
# Compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
# Reindex nodes using consecutive integers for the convenience of kernel computation.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
else:
iterator = self._graphs
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = self._graphs
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]
# # normalized adjacency matrices
# A_wave_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
# norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm)

if self._p == None: # p is uniform distribution as default.
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
else: # @todo
pass
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel

else: # @todo else: # @todo
pass pass
@@ -76,36 +71,31 @@ class FixedPoint(RandomWalk):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs)
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
# Compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout)
else:
iterator = self._graphs
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?

if self._p == None: # p is uniform distribution as default.
def init_worker(A_wave_list_toshare):
global G_A_wave_list
G_A_wave_list = A_wave_list_toshare
do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose)

else: # @todo
pass
# @todo: parallel this.
# Reindex nodes using consecutive integers for the convenience of kernel computation.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
else:
iterator = self._graphs
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.

def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)

else: # @todo else: # @todo
pass pass
@@ -113,39 +103,33 @@ class FixedPoint(RandomWalk):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
lmda = self._weight lmda = self._weight
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)

# Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
if self._verbose >= 2:
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
else:
iterator = g_list
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
if self._p is None and self._q is None: # p and q are uniform distributions as default.

if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]

if self._p == None: # p is uniform distribution as default.
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda)
kernel_list[i] = kernel
else: # @todo
pass
for i in iterator:
kernel = self.__kernel_do(g1, g_list[i], lmda)
kernel_list[i] = kernel

else: # @todo else: # @todo
pass pass
@@ -153,43 +137,38 @@ class FixedPoint(RandomWalk):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout)
else:
iterator = range(len(g_list))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?
# Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
# @todo: parallel this.
if self._verbose >= 2:
iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
else:
iterator = g_list
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
if self._p is None and self._q is None: # p and q are uniform distributions as default.


if self._p == None: # p is uniform distribution as default.
def init_worker(A_wave_1_toshare, A_wave_list_toshare):
global G_A_wave_1, G_A_wave_list
G_A_wave_1 = A_wave_1_toshare
G_A_wave_list = A_wave_list_toshare
def init_worker(g1_toshare, g_list_toshare):
global G_g1, G_g_list
G_g1 = g1_toshare
G_g_list = g_list_toshare


do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
else: # @todo
pass
else: # @todo else: # @todo
pass pass
@@ -197,49 +176,146 @@ class FixedPoint(RandomWalk):




def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight)
return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight)
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2])
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
lmda = self._weight lmda = self._weight
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose()
if self._p == None: # p is uniform distribution as default.
kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda)
else: # @todo
pass
# Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
if self._p is None and self._q is None: # p and q are uniform distributions as default.
kernel = self.__kernel_do(g1, g2, lmda)

else: # @todo else: # @todo
pass pass
return kernel return kernel
def __kernel_do(self, A_wave1, A_wave2, lmda):
def __kernel_do(self, g1, g2, lmda):
S = lmda * A_wave2
T_t = A_wave1
# Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = self._compute_vertex_kernels(g1, g2)
# Compute the weight matrix of the direct product graph.
w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict)
# use uniform distribution if there is no prior knowledge. # use uniform distribution if there is no prior knowledge.
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni)
X = dlyap(S, T_t, M0)
X = np.reshape(X, (-1, 1), order='F')
p_times_uni = 1 / w_dim
p_times = np.full((w_dim, 1), p_times_uni)
x = optimize.fixed_point(self._func_fp, p_times, args=(p_times, lmda, w_times), xtol=1e-06, maxiter=1000)
# use uniform distribution if there is no prior knowledge. # use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, X)
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight)
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight)
def _func_fp(x, p_times, lmda, w_times):
haha = w_times * x
haha = lmda * haha
haha = p_times + haha
return p_times + lmda * np.dot(w_times, x)
def _compute_vertex_kernels(self, g1, g2):
"""Compute vertex kernels between vertices of two graphs.
"""
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
# @todo: move if out to make it faster.
# @todo: node/edge kernels use direct function rather than dicts.
def _compute_weight_matrix(self, g1, g2, vk_dict):
"""Compute the weight matrix of the direct product graph.
"""
# Define edge kernels.
def compute_ek_11(e1, e2, ke):
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
def compute_ek_10(e1, e2, ke):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
return ke(e1_labels, e2_labels)
def compute_ek_01(e1, e2, ke):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
return ke(e1_attrs, e2_attrs)
def compute_ek_00(e1, e2, ke):
return 1
# Select the proper edge kernel.
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
ek_temp = compute_ek_11
# edge symb labeled
else:
ke = self._edge_kernels['symb']
ek_temp = compute_ek_10
else:
# edge non-synb labeled
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
ek_temp = compute_ek_01
# edge unlabeled
else:
ke = None
ek_temp = compute_ek_00 # @todo: check how much slower is this.
# Compute the weight matrix.
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled
if self._ds_infos['directed']:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])]
else: # undirected
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else: # node unlabeled
if self._ds_infos['directed']:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp(e1, e2, ke)
else: # undirected
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp(e1, e2, ke)
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]

return w_times, w_dim

+ 2
- 2
gklearn/kernels/graph_kernel.py View File

@@ -104,7 +104,7 @@ class GraphKernel(object):
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
gram_matrix = self._compute_gm_imap_unordered() gram_matrix = self._compute_gm_imap_unordered()
elif self._parallel == None:
elif self._parallel is None:
gram_matrix = self._compute_gm_series() gram_matrix = self._compute_gm_series()
else: else:
raise Exception('Parallel mode is not set correctly.') raise Exception('Parallel mode is not set correctly.')
@@ -130,7 +130,7 @@ class GraphKernel(object):
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
elif self._parallel == None:
elif self._parallel is None:
kernel_list = self._compute_kernel_list_series(g1, g_list) kernel_list = self._compute_kernel_list_series(g1, g_list)
else: else:
raise Exception('Parallel mode is not set correctly.') raise Exception('Parallel mode is not set correctly.')


+ 11
- 11
gklearn/kernels/marginalized.py View File

@@ -59,7 +59,7 @@ class Marginalized(GraphKernel):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
@@ -119,7 +119,7 @@ class Marginalized(GraphKernel):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
@@ -165,7 +165,7 @@ class Marginalized(GraphKernel):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list
@@ -184,12 +184,12 @@ class Marginalized(GraphKernel):
def __kernel_do(self, g1, g2): def __kernel_do(self, g1, g2):
"""Calculate marginalized graph kernel between 2 graphs.
"""Compute marginalized graph kernel between 2 graphs.
Parameters Parameters
---------- ----------
g1, g2 : NetworkX graphs g1, g2 : NetworkX graphs
2 graphs between which the kernel is calculated.
2 graphs between which the kernel is computed.
Return Return
------ ------
@@ -212,12 +212,12 @@ class Marginalized(GraphKernel):
# # matrix to save all the R_inf for all pairs of nodes # # matrix to save all the R_inf for all pairs of nodes
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
# #
# # calculate R_inf with a simple interative method
# # Compute R_inf with a simple interative method
# for i in range(1, n_iteration): # for i in range(1, n_iteration):
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
# R_inf_new.fill(r1) # R_inf_new.fill(r1)
# #
# # calculate R_inf for each pair of nodes
# # Compute R_inf for each pair of nodes
# for node1 in g1.nodes(data=True): # for node1 in g1.nodes(data=True):
# neighbor_n1 = g1[node1[0]] # neighbor_n1 = g1[node1[0]]
# # the transition probability distribution in the random walks # # the transition probability distribution in the random walks
@@ -243,7 +243,7 @@ class Marginalized(GraphKernel):
# neighbor2] # ref [1] equation (8) # neighbor2] # ref [1] equation (8)
# R_inf[:] = R_inf_new # R_inf[:] = R_inf_new
# #
# # add elements of R_inf up and calculate kernel
# # add elements of R_inf up and compute kernel
# for node1 in g1.nodes(data=True): # for node1 in g1.nodes(data=True):
# for node2 in g2.nodes(data=True): # for node2 in g2.nodes(data=True):
# s = p_init_G1 * p_init_G2 * deltakernel( # s = p_init_G1 * p_init_G2 * deltakernel(
@@ -288,11 +288,11 @@ class Marginalized(GraphKernel):
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels))
# calculate R_inf with a simple interative method
# Compute R_inf with a simple interative method
for i in range(2, self.__n_iteration + 1): for i in range(2, self.__n_iteration + 1):
R_inf_old = R_inf.copy() R_inf_old = R_inf.copy()
# calculate R_inf for each pair of nodes
# Compute R_inf for each pair of nodes
for node1 in g1.nodes(): for node1 in g1.nodes():
neighbor_n1 = g1[node1] neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks # the transition probability distribution in the random walks
@@ -309,7 +309,7 @@ class Marginalized(GraphKernel):
(t_dict[(node1, node2, neighbor1, neighbor2)] * \ (t_dict[(node1, node2, neighbor1, neighbor2)] * \
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8)
# add elements of R_inf up and calculate kernel
# add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items(): for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels))
kernel += s * value # ref [1] equation (6) kernel += s * value # ref [1] equation (6)


+ 16
- 16
gklearn/kernels/marginalizedKernel.py View File

@@ -39,15 +39,15 @@ def marginalizedkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate marginalized graph kernels between graphs.
"""Compute marginalized graph kernels between graphs.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


node_label : string node_label : string
Node attribute used as symbolic label. The default node label is 'atom'. Node attribute used as symbolic label. The default node label is 'atom'.
@@ -59,7 +59,7 @@ def marginalizedkernel(*args,
The termination probability in the random walks generating step. The termination probability in the random walks generating step.


n_iteration : integer n_iteration : integer
Time of iterations to calculate R_inf.
Time of iterations to compute R_inf.


remove_totters : boolean remove_totters : boolean
Whether to remove totterings by method introduced in [2]. The default Whether to remove totterings by method introduced in [2]. The default
@@ -83,11 +83,11 @@ def marginalizedkernel(*args,
Gn, Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'], attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
node_label=node_label, edge_label=edge_label) node_label=node_label, edge_label=edge_label)
if not ds_attrs['node_labeled'] or node_label == None:
if not ds_attrs['node_labeled'] or node_label is None:
node_label = 'atom' node_label = 'atom'
for G in Gn: for G in Gn:
nx.set_node_attributes(G, '0', 'atom') nx.set_node_attributes(G, '0', 'atom')
if not ds_attrs['edge_labeled'] or edge_label == None:
if not ds_attrs['edge_labeled'] or edge_label is None:
edge_label = 'bond_type' edge_label = 'bond_type'
for G in Gn: for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type') nx.set_edge_attributes(G, '0', 'bond_type')
@@ -133,7 +133,7 @@ def marginalizedkernel(*args,
# # ---- direct running, normally use single CPU core. ---- # # ---- direct running, normally use single CPU core. ----
## pbar = tqdm( ## pbar = tqdm(
## total=(1 + len(Gn)) * len(Gn) / 2, ## total=(1 + len(Gn)) * len(Gn) / 2,
## desc='calculating kernels',
## desc='Computing kernels',
## file=sys.stdout) ## file=sys.stdout)
# for i in range(0, len(Gn)): # for i in range(0, len(Gn)):
# for j in range(i, len(Gn)): # for j in range(i, len(Gn)):
@@ -152,12 +152,12 @@ def marginalizedkernel(*args,




def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
"""Calculate marginalized graph kernel between 2 graphs.
"""Compute marginalized graph kernel between 2 graphs.


Parameters Parameters
---------- ----------
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
2 graphs between which the kernel is computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -165,7 +165,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
p_quit : integer p_quit : integer
the termination probability in the random walks generating step. the termination probability in the random walks generating step.
n_iteration : integer n_iteration : integer
time of iterations to calculate R_inf.
time of iterations to compute R_inf.


Return Return
------ ------
@@ -188,12 +188,12 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
# # matrix to save all the R_inf for all pairs of nodes # # matrix to save all the R_inf for all pairs of nodes
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
# #
# # calculate R_inf with a simple interative method
# # Compute R_inf with a simple interative method
# for i in range(1, n_iteration): # for i in range(1, n_iteration):
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
# R_inf_new.fill(r1) # R_inf_new.fill(r1)
# #
# # calculate R_inf for each pair of nodes
# # Compute R_inf for each pair of nodes
# for node1 in g1.nodes(data=True): # for node1 in g1.nodes(data=True):
# neighbor_n1 = g1[node1[0]] # neighbor_n1 = g1[node1[0]]
# # the transition probability distribution in the random walks # # the transition probability distribution in the random walks
@@ -219,7 +219,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
# neighbor2] # ref [1] equation (8) # neighbor2] # ref [1] equation (8)
# R_inf[:] = R_inf_new # R_inf[:] = R_inf_new
# #
# # add elements of R_inf up and calculate kernel
# # add elements of R_inf up and compute kernel.
# for node1 in g1.nodes(data=True): # for node1 in g1.nodes(data=True):
# for node2 in g2.nodes(data=True): # for node2 in g2.nodes(data=True):
# s = p_init_G1 * p_init_G2 * deltakernel( # s = p_init_G1 * p_init_G2 * deltakernel(
@@ -267,11 +267,11 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
neighbor_n1[neighbor1][edge_label], neighbor_n1[neighbor1][edge_label],
neighbor_n2[neighbor2][edge_label]) neighbor_n2[neighbor2][edge_label])


# calculate R_inf with a simple interative method
# Compute R_inf with a simple interative method
for i in range(2, n_iteration + 1): for i in range(2, n_iteration + 1):
R_inf_old = R_inf.copy() R_inf_old = R_inf.copy()


# calculate R_inf for each pair of nodes
# Compute R_inf for each pair of nodes
for node1 in g1.nodes(): for node1 in g1.nodes():
neighbor_n1 = g1[node1] neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks # the transition probability distribution in the random walks
@@ -288,7 +288,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
(t_dict[(node1, node2, neighbor1, neighbor2)] * \ (t_dict[(node1, node2, neighbor1, neighbor2)] * \
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8)


# add elements of R_inf up and calculate kernel
# add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items(): for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel( s = p_init_G1 * p_init_G2 * deltakernel(
g1.nodes[n1][node_label], g2.nodes[n2][node_label]) g1.nodes[n1][node_label], g2.nodes[n2][node_label])


+ 8
- 8
gklearn/kernels/path_up_to_h.py View File

@@ -24,7 +24,7 @@ from gklearn.kernels import GraphKernel
from gklearn.utils import Trie from gklearn.utils import Trie




class PathUpToH(GraphKernel): # @todo: add function for k_func == None
class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
@@ -43,7 +43,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout) iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout)
iterator_kernel = tqdm(itr_kernel, desc='calculating kernels', file=sys.stdout)
iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout)
else: else:
iterator_ps = range(0, len(self._graphs)) iterator_ps = range(0, len(self._graphs))
iterator_kernel = itr_kernel iterator_kernel = itr_kernel
@@ -69,7 +69,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs) self.__add_dummy_labels(self._graphs)
# get all paths of all graphs before calculating kernels to save time,
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs))) itr = zip(self._graphs, range(0, len(self._graphs)))
@@ -123,7 +123,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
if self._verbose >= 2: if self._verbose >= 2:
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
iterator_kernel = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator_ps = g_list iterator_ps = g_list
iterator_kernel = range(len(g_list)) iterator_kernel = range(len(g_list))
@@ -149,7 +149,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1]) self.__add_dummy_labels(g_list + [g1])
# get all paths of all graphs before calculating kernels to save time,
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -190,7 +190,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list
@@ -218,7 +218,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None


def __kernel_do_trie(self, trie1, trie2): def __kernel_do_trie(self, trie1, trie2):
"""Calculate path graph kernels up to depth d between 2 graphs using trie.
"""Compute path graph kernels up to depth d between 2 graphs using trie.
Parameters Parameters
---------- ----------
@@ -335,7 +335,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def __kernel_do_naive(self, paths1, paths2): def __kernel_do_naive(self, paths1, paths2):
"""Calculate path graph kernels up to depth d between 2 graphs naively.
"""Compute path graph kernels up to depth d between 2 graphs naively.
Parameters Parameters
---------- ----------


+ 43
- 43
gklearn/kernels/randomWalkKernel.py View File

@@ -37,15 +37,15 @@ def randomwalkkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate random walk graph kernels.
"""Compute random walk graph kernels.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


compute_method : string compute_method : string
Method used to compute kernel. The Following choices are Method used to compute kernel. The Following choices are
@@ -125,7 +125,7 @@ def randomwalkkernel(*args,
Gn = [g.copy() for g in Gn] Gn = [g.copy() for g in Gn]


eweight = None eweight = None
if edge_weight == None:
if edge_weight is None:
if verbose: if verbose:
print('\n None edge weight specified. Set all weight to 1.\n') print('\n None edge weight specified. Set all weight to 1.\n')
else: else:
@@ -212,12 +212,12 @@ def randomwalkkernel(*args,


############################################################################### ###############################################################################
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True): def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.
"""Compute walk graph kernels up to n between 2 graphs using Sylvester method.


Parameters Parameters
---------- ----------
G1, G2 : NetworkX graph G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -230,7 +230,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True
""" """
Kmatrix = np.zeros((len(Gn), len(Gn))) Kmatrix = np.zeros((len(Gn), len(Gn)))


if q == None:
if q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_list = [ A_wave_list = [
@@ -245,7 +245,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True
# norm = A_tilde.sum(axis=0) # norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1 # norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm) # A_wave_list.append(A_tilde / norm)
if p == None: # p is uniform distribution as default.
if p is None: # p is uniform distribution as default.
def init_worker(Awl_toshare): def init_worker(Awl_toshare):
global G_Awl global G_Awl
G_Awl = Awl_toshare G_Awl = Awl_toshare
@@ -255,7 +255,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True
# pbar = tqdm( # pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2, # total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout) # file=sys.stdout)
# for i in range(0, len(Gn)): # for i in range(0, len(Gn)):
# for j in range(i, len(Gn)): # for j in range(i, len(Gn)):
@@ -300,12 +300,12 @@ def _se_do(A_wave1, A_wave2, lmda):
############################################################################### ###############################################################################
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): node_label, edge_label, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method.
"""Compute walk graph kernels up to n between 2 graphs using conjugate method.


Parameters Parameters
---------- ----------
G1, G2 : NetworkX graph G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -321,14 +321,14 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1: # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1:
# # this is faster from unlabeled graphs. @todo: why? # # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# if q is None:
# # don't normalize adjacency matrices if q is a uniform vector. Note # # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list actually contains the transposes of the adjacency matrices. # # A_wave_list actually contains the transposes of the adjacency matrices.
# A_wave_list = [ # A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in # nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ] # ]
# if p == None: # p is uniform distribution as default.
# if p is None: # p is uniform distribution as default.
# def init_worker(Awl_toshare): # def init_worker(Awl_toshare):
# global G_Awl # global G_Awl
# G_Awl = Awl_toshare # G_Awl = Awl_toshare
@@ -336,23 +336,23 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, # parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
# glbv=(A_wave_list,), n_jobs=n_jobs) # glbv=(A_wave_list,), n_jobs=n_jobs)
# else: # else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
# reindex nodes using consecutive integers for convenience of kernel computation.
Gn = [nx.convert_node_labels_to_integers( Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( g, first_label=0, label_attribute='label_orignal') for g in (tqdm(
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)]
if p == None and q == None: # p and q are uniform distributions as default.
if p is None and q is None: # p and q are uniform distributions as default.
def init_worker(gn_toshare): def init_worker(gn_toshare):
global G_gn global G_gn
G_gn = gn_toshare G_gn = gn_toshare
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels,
do_partial = partial(wrapper_cg_labeled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda) node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
# pbar = tqdm( # pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2, # total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout) # file=sys.stdout)
# for i in range(0, len(Gn)): # for i in range(0, len(Gn)):
# for j in range(i, len(Gn)): # for j in range(i, len(Gn)):
@@ -382,24 +382,24 @@ def _cg_unlabled_do(A_wave1, A_wave2, lmda):
return np.dot(q_times, x) return np.dot(q_times, x)




def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
def wrapper_cg_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr): edge_label, lmda, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
return i, j, _cg_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda) node_label, edge_kernels, edge_label, lmda)




def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
def _cg_labeled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda): edge_kernels, edge_label, lmda):
# Frist, compute kernels between all pairs of nodes, method borrowed
# Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels # from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very # graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow. # sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
# Compute the weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label) edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge. # use uniform distribution if there is no prior knowledge.
@@ -415,12 +415,12 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
############################################################################### ###############################################################################
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): node_label, edge_label, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.
"""Compute walk graph kernels up to n between 2 graphs using Fixed-Point method.


Parameters Parameters
---------- ----------
G1, G2 : NetworkX graph G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -438,17 +438,17 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1: # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1:
# # this is faster from unlabeled graphs. @todo: why? # # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# if q is None:
# # don't normalize adjacency matrices if q is a uniform vector. Note # # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list actually contains the transposes of the adjacency matrices. # # A_wave_list actually contains the transposes of the adjacency matrices.
# A_wave_list = [ # A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in # nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ] # ]
# if p == None: # p is uniform distribution as default.
# if p is None: # p is uniform distribution as default.
# pbar = tqdm( # pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2, # total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout) # file=sys.stdout)
# for i in range(0, len(Gn)): # for i in range(0, len(Gn)):
# for j in range(i, len(Gn)): # for j in range(i, len(Gn)):
@@ -464,33 +464,33 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# Kmatrix[j][i] = Kmatrix[i][j] # Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1) # pbar.update(1)
# else: # else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
# reindex nodes using consecutive integers for the convenience of kernel computation.
Gn = [nx.convert_node_labels_to_integers( Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( g, first_label=0, label_attribute='label_orignal') for g in (tqdm(
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)]
if p == None and q == None: # p and q are uniform distributions as default.
if p is None and q is None: # p and q are uniform distributions as default.
def init_worker(gn_toshare): def init_worker(gn_toshare):
global G_gn global G_gn
G_gn = gn_toshare G_gn = gn_toshare
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels,
do_partial = partial(wrapper_fp_labeled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda) node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
return Kmatrix return Kmatrix




def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
def wrapper_fp_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr): edge_label, lmda, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
return i, j, _fp_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda) node_label, edge_kernels, edge_label, lmda)




def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
def _fp_labeled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda): edge_kernels, edge_label, lmda):
# Frist, compute kernels between all pairs of nodes, method borrowed
# Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels # from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very # graphs compared, which is the most case we went though. For very
@@ -519,13 +519,13 @@ def func_fp(x, p_times, lmda, w_times):


############################################################################### ###############################################################################
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True): def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using
"""Compute walk graph kernels up to n between 2 unlabeled graphs using
spectral decomposition method. Labels will be ignored. spectral decomposition method. Labels will be ignored.


Parameters Parameters
---------- ----------
G1, G2 : NetworkX graph G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -538,7 +538,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk
""" """
Kmatrix = np.zeros((len(Gn), len(Gn))) Kmatrix = np.zeros((len(Gn), len(Gn)))


if q == None:
if q is None:
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
P_list = [] P_list = []
D_list = [] D_list = []
@@ -552,7 +552,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk
P_list.append(ev) P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?


if p == None: # p is uniform distribution as default.
if p is None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn] q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn]
# q_T_list = [q.T for q in q_list] # q_T_list = [q.T for q in q_list]
def init_worker(q_T_toshare, P_toshare, D_toshare): def init_worker(q_T_toshare, P_toshare, D_toshare):
@@ -568,7 +568,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk
# pbar = tqdm( # pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2, # total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout) # file=sys.stdout)
# for i in range(0, len(Gn)): # for i in range(0, len(Gn)):
# for j in range(i, len(Gn)): # for j in range(i, len(Gn)):
@@ -605,12 +605,12 @@ def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):


############################################################################### ###############################################################################
def _randomwalkkernel_kron(G1, G2, node_label, edge_label): def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.
"""Compute walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.


Parameters Parameters
---------- ----------
G1, G2 : NetworkX graph G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -692,8 +692,8 @@ def computeVK(g1, g2, ds_attrs, node_kernels, node_label):




def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label): def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):
'''Compute weight matrix of the direct product graph.
'''
"""Compute the weight matrix of the direct product graph.
"""
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim)) w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled if vk_dict: # node labeled


+ 22
- 60
gklearn/kernels/random_walk.py View File

@@ -10,85 +10,47 @@ Created on Wed Aug 19 16:55:17 2020
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
""" """


import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import direct_product_graph
from gklearn.kernels import GraphKernel
from gklearn.kernels import SylvesterEquation, ConjugateGradient, FixedPoint, SpectralDecomposition




class RandomWalk(GraphKernel):
class RandomWalk(SylvesterEquation, ConjugateGradient, FixedPoint, SpectralDecomposition):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self)
self._compute_method = kwargs.get('compute_method', None) self._compute_method = kwargs.get('compute_method', None)
self._weight = kwargs.get('weight', 1)
self._p = kwargs.get('p', None)
self._q = kwargs.get('q', None)
self._edge_weight = kwargs.get('edge_weight', None)
self._ds_infos = kwargs.get('ds_infos', {})
self._compute_method = self._compute_method.lower()
self._compute_method = self.__compute_method.lower()
if self._compute_method == 'sylvester':
self._parent = SylvesterEquation
elif self._compute_method == 'conjugate':
self._parent = ConjugateGradient
elif self._compute_method == 'fp':
self._parent = FixedPoint
elif self._compute_method == 'spectral':
self._parent = SpectralDecomposition
elif self._compute_method == 'kon':
raise Exception('This computing method is not completed yet.')
else:
raise Exception('This computing method does not exist. The possible choices inlcude: "sylvester", "conjugate", "fp", "spectral".')

self._parent.__init__(self, **kwargs)
def _compute_gm_series(self): def _compute_gm_series(self):
pass
return self._parent._compute_gm_series(self)




def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
pass
return self._parent._compute_gm_imap_unordered(self)
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
pass
return self._parent._compute_kernel_list_series(self, g1, g_list)


def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
pass
return self._parent._compute_kernel_list_imap_unordered(self, g1, g_list)
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
pass
def _check_graphs(self, Gn):
# remove graphs with no edges, as no walk can be found in their structures,
# so the weight matrix between such a graph and itself might be zero.
for g in Gn:
if nx.number_of_edges(g) == 0:
raise Exception('Graphs must contain edges to construct weight matrices.')
def _check_edge_weight(self, G0, verbose):
eweight = None
if self._edge_weight == None:
if verbose >= 2:
print('\n None edge weight is specified. Set all weight to 1.\n')
else:
try:
some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0]
if isinstance(some_weight, float) or isinstance(some_weight, int):
eweight = self._edge_weight
else:
if verbose >= 2:
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight)
except:
if verbose >= 2:
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight)
self._edge_weight = eweight
def _add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
return self._parent._compute_single_kernel_series(self, g1, g2)

+ 86
- 0
gklearn/kernels/random_walk_meta.py View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 16:55:17 2020

@author: ljia

@references:

[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""

import networkx as nx
from gklearn.utils import SpecialLabel
from gklearn.kernels import GraphKernel


class RandomWalkMeta(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self._weight = kwargs.get('weight', 1)
self._p = kwargs.get('p', None)
self._q = kwargs.get('q', None)
self._edge_weight = kwargs.get('edge_weight', None)
self._ds_infos = kwargs.get('ds_infos', {})
def _compute_gm_series(self):
pass


def _compute_gm_imap_unordered(self):
pass
def _compute_kernel_list_series(self, g1, g_list):
pass

def _compute_kernel_list_imap_unordered(self, g1, g_list):
pass
def _compute_single_kernel_series(self, g1, g2):
pass
def _check_graphs(self, Gn):
# remove graphs with no edges, as no walk can be found in their structures,
# so the weight matrix between such a graph and itself might be zero.
for g in Gn:
if nx.number_of_edges(g) == 0:
raise Exception('Graphs must contain edges to construct weight matrices.')
def _check_edge_weight(self, G0, verbose):
eweight = None
if self._edge_weight is None:
if verbose >= 2:
print('\n None edge weight is specified. Set all weight to 1.\n')
else:
try:
some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0]
if isinstance(some_weight, float) or isinstance(some_weight, int):
eweight = self._edge_weight
else:
if verbose >= 2:
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight)
except:
if verbose >= 2:
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight)
self._edge_weight = eweight
def _add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 3
- 3
gklearn/kernels/shortest_path.py View File

@@ -47,7 +47,7 @@ class ShortestPath(GraphKernel):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
@@ -102,7 +102,7 @@ class ShortestPath(GraphKernel):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
@@ -145,7 +145,7 @@ class ShortestPath(GraphKernel):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list


+ 5
- 5
gklearn/kernels/spKernel.py View File

@@ -29,15 +29,15 @@ def spkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate shortest-path kernels between graphs.
"""Compute shortest-path kernels between graphs.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


node_label : string node_label : string
Node attribute used as label. The default node label is atom. Node attribute used as label. The default node label is atom.
@@ -179,7 +179,7 @@ def spkernel(*args,
# do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) # do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
# itr = combinations_with_replacement(range(0, len(Gn)), 2) # itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j, kernel in tqdm( # for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# pool.map(do_partial, itr), desc='Computing kernels',
# file=sys.stdout): # file=sys.stdout):
# Kmatrix[i][j] = kernel # Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel # Kmatrix[j][i] = kernel
@@ -202,7 +202,7 @@ def spkernel(*args,
# # ---- direct running, normally use single CPU core. ---- # # ---- direct running, normally use single CPU core. ----
# from itertools import combinations_with_replacement # from itertools import combinations_with_replacement
# itr = combinations_with_replacement(range(0, len(Gn)), 2) # itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout):
# kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels) # kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels)
# Kmatrix[i][j] = kernel # Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel # Kmatrix[j][i] = kernel


+ 23
- 23
gklearn/kernels/spectral_decomposition.py View File

@@ -16,19 +16,19 @@ import numpy as np
import networkx as nx import networkx as nx
from scipy.sparse import kron from scipy.sparse import kron
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import RandomWalk
from gklearn.kernels import RandomWalkMeta




class SpectralDecomposition(RandomWalk):
class SpectralDecomposition(RandomWalkMeta):
def __init__(self, **kwargs): def __init__(self, **kwargs):
RandomWalk.__init__(self, **kwargs)
super().__init__(**kwargs)
self._sub_kernel = kwargs.get('sub_kernel', None) self._sub_kernel = kwargs.get('sub_kernel', None)


def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs)
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -37,7 +37,7 @@ class SpectralDecomposition(RandomWalk):
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
if self._q is None:
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
P_list = [] P_list = []
D_list = [] D_list = []
@@ -54,14 +54,14 @@ class SpectralDecomposition(RandomWalk):
P_list.append(ev) P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs]
# q_T_list = [q.T for q in q_list] # q_T_list = [q.T for q in q_list]


from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
@@ -79,7 +79,7 @@ class SpectralDecomposition(RandomWalk):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs)
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -88,7 +88,7 @@ class SpectralDecomposition(RandomWalk):
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
if self._q is None:
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
P_list = [] P_list = []
D_list = [] D_list = []
@@ -104,7 +104,7 @@ class SpectralDecomposition(RandomWalk):
D_list.append(ew) D_list.append(ew)
P_list.append(ev) # @todo: parallel? P_list.append(ev) # @todo: parallel?


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel? q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel?
def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare): def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare):
@@ -126,7 +126,7 @@ class SpectralDecomposition(RandomWalk):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -135,16 +135,16 @@ class SpectralDecomposition(RandomWalk):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._q == None:
if self._q is None:
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
D1, P1 = np.linalg.eig(A1) D1, P1 = np.linalg.eig(A1)
P_list = [] P_list = []
D_list = [] D_list = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout)
iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout)
else: else:
iterator = range(len(g_list))
iterator = g_list
for G in iterator: for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix. # A actually is the transpose of the adjacency matrix.
@@ -153,11 +153,11 @@ class SpectralDecomposition(RandomWalk):
D_list.append(ew) D_list.append(ew)
P_list.append(ev) P_list.append(ev)


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1) q_T1 = 1 / nx.number_of_nodes(g1)
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list]
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
@@ -174,7 +174,7 @@ class SpectralDecomposition(RandomWalk):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -183,7 +183,7 @@ class SpectralDecomposition(RandomWalk):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._q == None:
if self._q is None:
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
D1, P1 = np.linalg.eig(A1) D1, P1 = np.linalg.eig(A1)
@@ -201,7 +201,7 @@ class SpectralDecomposition(RandomWalk):
D_list.append(ew) D_list.append(ew)
P_list.append(ev) # @todo: parallel? P_list.append(ev) # @todo: parallel?


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1) q_T1 = 1 / nx.number_of_nodes(g1)
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel? q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel?
@@ -221,7 +221,7 @@ class SpectralDecomposition(RandomWalk):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
else: # @todo else: # @todo
pass pass
@@ -236,20 +236,20 @@ class SpectralDecomposition(RandomWalk):
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2])
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.') warnings.warn('All labels are ignored. Only works for undirected graphs.')
if self._q == None:
if self._q is None:
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
D1, P1 = np.linalg.eig(A1) D1, P1 = np.linalg.eig(A1)
A2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() A2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose()
D2, P2 = np.linalg.eig(A2) D2, P2 = np.linalg.eig(A2)


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1) q_T1 = 1 / nx.number_of_nodes(g1)
q_T2 = 1 / nx.number_of_nodes(g2) q_T2 = 1 / nx.number_of_nodes(g2)
kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel) kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel)


+ 6
- 39
gklearn/kernels/structural_sp.py View File

@@ -18,7 +18,7 @@ from tqdm import tqdm
# import networkx as nx # import networkx as nx
import numpy as np import numpy as np
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import get_shortest_paths
from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel




@@ -57,7 +57,7 @@ class StructuralSP(GraphKernel):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
if self.__compute_method == 'trie': if self.__compute_method == 'trie':
@@ -135,7 +135,7 @@ class StructuralSP(GraphKernel):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
if self.__compute_method == 'trie': if self.__compute_method == 'trie':
@@ -193,7 +193,7 @@ class StructuralSP(GraphKernel):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list
@@ -273,7 +273,7 @@ class StructuralSP(GraphKernel):
if len(p1) == len(p2): if len(p1) == len(p2):
kernel += 1 kernel += 1
try: try:
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average
except ZeroDivisionError: except ZeroDivisionError:
print(spl1, spl2) print(spl1, spl2)
print(g1.nodes(data=True)) print(g1.nodes(data=True))
@@ -318,40 +318,7 @@ class StructuralSP(GraphKernel):
def __get_all_node_kernels(self, g1, g2): def __get_all_node_kernels(self, g1, g2):
# compute shortest path matrices, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if len(self.__node_labels) > 0:
# node symb and non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['mix']
for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
kn = self.__node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled
else:
pass
return vk_dict
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
def __get_all_edge_kernels(self, g1, g2): def __get_all_edge_kernels(self, g1, g2):


+ 13
- 13
gklearn/kernels/structuralspKernel.py View File

@@ -37,15 +37,15 @@ def structuralspkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate mean average structural shortest path kernels between graphs.
"""Compute mean average structural shortest path kernels between graphs.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


node_label : string node_label : string
Node attribute used as label. The default node label is atom. Node attribute used as label. The default node label is atom.
@@ -215,7 +215,7 @@ def structuralspkernel(*args,
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(Gn)), 2) itr = combinations_with_replacement(range(0, len(Gn)), 2)
if verbose: if verbose:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
if compute_method == 'trie': if compute_method == 'trie':
@@ -241,7 +241,7 @@ def structuralspkernel(*args,
# combinations_with_replacement(splist, 2), # combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2)) # combinations_with_replacement(range(0, len(Gn)), 2))
# for i, j, kernel in tqdm( # for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# pool.map(do_partial, itr), desc='Computing kernels',
# file=sys.stdout): # file=sys.stdout):
# Kmatrix[i][j] = kernel # Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel # Kmatrix[j][i] = kernel
@@ -263,7 +263,7 @@ def structuralspkernel(*args,
# with closing(Pool(n_jobs)) as pool: # with closing(Pool(n_jobs)) as pool:
# for i, j, kernel in tqdm( # for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, 1000), # pool.imap_unordered(do_partial, itr, 1000),
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout): # file=sys.stdout):
# Kmatrix[i][j] = kernel # Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel # Kmatrix[j][i] = kernel
@@ -335,7 +335,7 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
if len(p1) == len(p2): if len(p1) == len(p2):
kernel += 1 kernel += 1
try: try:
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average
except ZeroDivisionError: except ZeroDivisionError:
print(spl1, spl2) print(spl1, spl2)
print(g1.nodes(data=True)) print(g1.nodes(data=True))
@@ -429,7 +429,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label,
# # compute graph kernels # # compute graph kernels
# traverseBothTrie(trie1[0].root, trie2[0], kernel) # traverseBothTrie(trie1[0].root, trie2[0], kernel)
# #
# kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average
# kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average


# # traverse all paths in graph1. Deep-first search is applied. # # traverse all paths in graph1. Deep-first search is applied.
# def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]): # def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):
@@ -485,7 +485,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label,
else: else:
traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict) traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict)


kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average
kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average


return kernel return kernel


@@ -781,9 +781,9 @@ def get_shortest_paths(G, weight, directed):
Parameters Parameters
---------- ----------
G : NetworkX graphs G : NetworkX graphs
The graphs whose paths are calculated.
The graphs whose paths are computed.
weight : string/None weight : string/None
edge attribute used as weight to calculate the shortest path.
edge attribute used as weight to compute the shortest path.
directed: boolean directed: boolean
Whether graph is directed. Whether graph is directed.


@@ -822,9 +822,9 @@ def get_sps_as_trie(G, weight, directed):
Parameters Parameters
---------- ----------
G : NetworkX graphs G : NetworkX graphs
The graphs whose paths are calculated.
The graphs whose paths are computed.
weight : string/None weight : string/None
edge attribute used as weight to calculate the shortest path.
edge attribute used as weight to compute the shortest path.
directed: boolean directed: boolean
Whether graph is directed. Whether graph is directed.




+ 30
- 30
gklearn/kernels/sylvester_equation.py View File

@@ -16,18 +16,18 @@ import numpy as np
import networkx as nx import networkx as nx
from control import dlyap from control import dlyap
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import RandomWalk
from gklearn.kernels import RandomWalkMeta




class SylvesterEquation(RandomWalk):
class SylvesterEquation(RandomWalkMeta):
def __init__(self, **kwargs): def __init__(self, **kwargs):
RandomWalk.__init__(self, **kwargs)
super().__init__(**kwargs)


def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs)
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -38,7 +38,7 @@ class SylvesterEquation(RandomWalk):
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
if self._verbose >= 2: if self._verbose >= 2:
@@ -54,16 +54,16 @@ class SylvesterEquation(RandomWalk):
# norm[norm == 0] = 1 # norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm) # A_wave_list.append(A_tilde / norm)


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda)
kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -76,7 +76,7 @@ class SylvesterEquation(RandomWalk):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs)
self._check_edge_weight(self._graphs, self._verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -85,7 +85,7 @@ class SylvesterEquation(RandomWalk):
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
if self._verbose >= 2: if self._verbose >= 2:
@@ -94,7 +94,7 @@ class SylvesterEquation(RandomWalk):
iterator = self._graphs iterator = self._graphs
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
def init_worker(A_wave_list_toshare): def init_worker(A_wave_list_toshare):
global G_A_wave_list global G_A_wave_list
G_A_wave_list = A_wave_list_toshare G_A_wave_list = A_wave_list_toshare
@@ -113,7 +113,7 @@ class SylvesterEquation(RandomWalk):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -124,24 +124,24 @@ class SylvesterEquation(RandomWalk):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._q == None:
if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout)
iterator = tqdm(g_list, desc='compute adjacency matrices', file=sys.stdout)
else: else:
iterator = range(len(g_list))
iterator = g_list
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda)
kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda)
kernel_list[i] = kernel kernel_list[i] = kernel
else: # @todo else: # @todo
@@ -153,7 +153,7 @@ class SylvesterEquation(RandomWalk):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -162,17 +162,17 @@ class SylvesterEquation(RandomWalk):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._q == None:
if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout)
iterator = tqdm(g_list, desc='compute adjacency matrices', file=sys.stdout)
else: else:
iterator = range(len(g_list))
iterator = g_list
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?


if self._p == None: # p is uniform distribution as default.
if self._p is None: # p is uniform distribution as default.
def init_worker(A_wave_1_toshare, A_wave_list_toshare): def init_worker(A_wave_1_toshare, A_wave_list_toshare):
global G_A_wave_1, G_A_wave_list global G_A_wave_1, G_A_wave_list
G_A_wave_1 = A_wave_1_toshare G_A_wave_1 = A_wave_1_toshare
@@ -186,7 +186,7 @@ class SylvesterEquation(RandomWalk):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
else: # @todo else: # @todo
pass pass
@@ -201,7 +201,7 @@ class SylvesterEquation(RandomWalk):
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2])
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])
if self._verbose >= 2: if self._verbose >= 2:
import warnings import warnings
@@ -209,13 +209,13 @@ class SylvesterEquation(RandomWalk):
lmda = self._weight lmda = self._weight
if self._q == None:
if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose()
if self._p == None: # p is uniform distribution as default.
kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda)
if self._p is None: # p is uniform distribution as default.
kernel = self._kernel_do(A_wave_1, A_wave_2, lmda)
else: # @todo else: # @todo
pass pass
else: # @todo else: # @todo
@@ -224,7 +224,7 @@ class SylvesterEquation(RandomWalk):
return kernel return kernel
def __kernel_do(self, A_wave1, A_wave2, lmda):
def _kernel_do(self, A_wave1, A_wave2, lmda):
S = lmda * A_wave2 S = lmda * A_wave2
T_t = A_wave1 T_t = A_wave1
@@ -242,4 +242,4 @@ class SylvesterEquation(RandomWalk):
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight)
return i, j, self._kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight)

+ 8
- 8
gklearn/kernels/treelet.py View File

@@ -39,7 +39,7 @@ class Treelet(GraphKernel):
def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs) self.__add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys = [] canonkeys = []
if self._verbose >= 2: if self._verbose >= 2:
@@ -55,7 +55,7 @@ class Treelet(GraphKernel):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
@@ -69,7 +69,7 @@ class Treelet(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs) self.__add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs))) itr = zip(self._graphs, range(0, len(self._graphs)))
@@ -105,7 +105,7 @@ class Treelet(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1]) self.__add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1) canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_list = [] canonkeys_list = []
@@ -119,7 +119,7 @@ class Treelet(GraphKernel):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
@@ -132,7 +132,7 @@ class Treelet(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1]) self.__add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1) canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_list = [[] for _ in range(len(g_list))] canonkeys_list = [[] for _ in range(len(g_list))]
@@ -167,7 +167,7 @@ class Treelet(GraphKernel):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list
@@ -185,7 +185,7 @@ class Treelet(GraphKernel):
def __kernel_do(self, canonkey1, canonkey2): def __kernel_do(self, canonkey1, canonkey2):
"""Calculate treelet graph kernel between 2 graphs.
"""Compute treelet graph kernel between 2 graphs.
Parameters Parameters
---------- ----------


+ 8
- 8
gklearn/kernels/treeletKernel.py View File

@@ -29,15 +29,15 @@ def treeletkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate treelet graph kernels between graphs.
"""Compute treelet graph kernels between graphs.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


sub_kernel : function sub_kernel : function
The sub-kernel between 2 real number vectors. Each vector counts the The sub-kernel between 2 real number vectors. Each vector counts the
@@ -89,7 +89,7 @@ def treeletkernel(*args,
# ---- use pool.imap_unordered to parallel and track progress. ---- # ---- use pool.imap_unordered to parallel and track progress. ----
if parallel == 'imap_unordered': if parallel == 'imap_unordered':
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
pool = Pool(n_jobs) pool = Pool(n_jobs)
itr = zip(Gn, range(0, len(Gn))) itr = zip(Gn, range(0, len(Gn)))
@@ -120,8 +120,8 @@ def treeletkernel(*args,
glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
# ---- do not use parallelization. ---- # ---- do not use parallelization. ----
elif parallel == None:
# get all canonical keys of all graphs before calculating kernels to save
elif parallel is None:
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys = [] canonkeys = []
for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn):
@@ -148,7 +148,7 @@ def treeletkernel(*args,




def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): def _treeletkernel_do(canonkey1, canonkey2, sub_kernel):
"""Calculate treelet graph kernel between 2 graphs.
"""Compute treelet graph kernel between 2 graphs.
Parameters Parameters
---------- ----------
@@ -210,7 +210,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed):


# n-star patterns # n-star patterns
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3]
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4]
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] # @todo: check self loop.
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5]
# n-star patterns # n-star patterns
canonkey['6'] = len(patterns['3star']) canonkey['6'] = len(patterns['3star'])


+ 17
- 17
gklearn/kernels/untilHPathKernel.py View File

@@ -34,15 +34,15 @@ def untilhpathkernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate path graph kernels up to depth/hight h between graphs.
"""Compute path graph kernels up to depth/hight h between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


node_label : string node_label : string
Node attribute used as label. The default node label is atom. Node attribute used as label. The default node label is atom.
@@ -91,7 +91,7 @@ def untilhpathkernel(*args,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'], 'edge_attr_dim', 'is_directed'],
node_label=node_label, edge_label=edge_label) node_label=node_label, edge_label=edge_label)
if k_func != None:
if k_func is not None:
if not ds_attrs['node_labeled']: if not ds_attrs['node_labeled']:
for G in Gn: for G in Gn:
nx.set_node_attributes(G, '0', 'atom') nx.set_node_attributes(G, '0', 'atom')
@@ -103,7 +103,7 @@ def untilhpathkernel(*args,


if parallel == 'imap_unordered': if parallel == 'imap_unordered':
# ---- use pool.imap_unordered to parallel and track progress. ---- # ---- use pool.imap_unordered to parallel and track progress. ----
# get all paths of all graphs before calculating kernels to save time,
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
pool = Pool(n_jobs) pool = Pool(n_jobs)
itr = zip(Gn, range(0, len(Gn))) itr = zip(Gn, range(0, len(Gn)))
@@ -113,10 +113,10 @@ def untilhpathkernel(*args,
else: else:
chunksize = 100 chunksize = 100
all_paths = [[] for _ in range(len(Gn))] all_paths = [[] for _ in range(len(Gn))]
if compute_method == 'trie' and k_func != None:
if compute_method == 'trie' and k_func is not None:
getps_partial = partial(wrapper_find_all_path_as_trie, depth, getps_partial = partial(wrapper_find_all_path_as_trie, depth,
ds_attrs, node_label, edge_label) ds_attrs, node_label, edge_label)
elif compute_method != 'trie' and k_func != None:
elif compute_method != 'trie' and k_func is not None:
getps_partial = partial(wrapper_find_all_paths_until_length, depth, getps_partial = partial(wrapper_find_all_paths_until_length, depth,
ds_attrs, node_label, edge_label, True) ds_attrs, node_label, edge_label, True)
else: else:
@@ -133,9 +133,9 @@ def untilhpathkernel(*args,
pool.join() pool.join()
# for g in Gn: # for g in Gn:
# if compute_method == 'trie' and k_func != None:
# if compute_method == 'trie' and k_func is not None:
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
# elif compute_method != 'trie' and k_func != None:
# elif compute_method != 'trie' and k_func is not None:
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label)
# else: # else:
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False) # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False)
@@ -155,14 +155,14 @@ def untilhpathkernel(*args,
## all_paths[i] = ps ## all_paths[i] = ps
## print(time.time() - ttt) ## print(time.time() - ttt)
if compute_method == 'trie' and k_func != None:
if compute_method == 'trie' and k_func is not None:
def init_worker(trie_toshare): def init_worker(trie_toshare):
global G_trie global G_trie
G_trie = trie_toshare G_trie = trie_toshare
do_partial = partial(wrapper_uhpath_do_trie, k_func) do_partial = partial(wrapper_uhpath_do_trie, k_func)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
elif compute_method != 'trie' and k_func != None:
elif compute_method != 'trie' and k_func is not None:
def init_worker(plist_toshare): def init_worker(plist_toshare):
global G_plist global G_plist
G_plist = plist_toshare G_plist = plist_toshare
@@ -177,7 +177,7 @@ def untilhpathkernel(*args,
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
elif parallel == None:
elif parallel is None:
# from pympler import asizeof # from pympler import asizeof
# ---- direct running, normally use single CPU core. ---- # ---- direct running, normally use single CPU core. ----
# print(asizeof.asized(all_paths, detail=1).format()) # print(asizeof.asized(all_paths, detail=1).format())
@@ -195,7 +195,7 @@ def untilhpathkernel(*args,
# print(sizeof_allpaths) # print(sizeof_allpaths)
pbar = tqdm( pbar = tqdm(
total=((len(Gn) + 1) * len(Gn) / 2), total=((len(Gn) + 1) * len(Gn) / 2),
desc='calculating kernels',
desc='Computing kernels',
file=sys.stdout) file=sys.stdout)
for i in range(0, len(Gn)): for i in range(0, len(Gn)):
for j in range(i, len(Gn)): for j in range(i, len(Gn)):
@@ -217,7 +217,7 @@ def untilhpathkernel(*args,
# print(sizeof_allpaths) # print(sizeof_allpaths)
pbar = tqdm( pbar = tqdm(
total=((len(Gn) + 1) * len(Gn) / 2), total=((len(Gn) + 1) * len(Gn) / 2),
desc='calculating kernels',
desc='Computing kernels',
file=sys.stdout) file=sys.stdout)
for i in range(0, len(Gn)): for i in range(0, len(Gn)):
for j in range(i, len(Gn)): for j in range(i, len(Gn)):
@@ -236,7 +236,7 @@ def untilhpathkernel(*args,




def _untilhpathkernel_do_trie(trie1, trie2, k_func): def _untilhpathkernel_do_trie(trie1, trie2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs using trie.
"""Compute path graph kernels up to depth d between 2 graphs using trie.


Parameters Parameters
---------- ----------
@@ -351,7 +351,7 @@ def wrapper_uhpath_do_trie(k_func, itr):


def _untilhpathkernel_do_naive(paths1, paths2, k_func): def _untilhpathkernel_do_naive(paths1, paths2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs naively.
"""Compute path graph kernels up to depth d between 2 graphs naively.


Parameters Parameters
---------- ----------
@@ -400,7 +400,7 @@ def wrapper_uhpath_do_naive(k_func, itr):




def _untilhpathkernel_do_kernelless(paths1, paths2, k_func): def _untilhpathkernel_do_kernelless(paths1, paths2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs naively.
"""Compute path graph kernels up to depth d between 2 graphs naively.


Parameters Parameters
---------- ----------


+ 17
- 17
gklearn/kernels/weisfeilerLehmanKernel.py View File

@@ -32,15 +32,15 @@ def weisfeilerlehmankernel(*args,
n_jobs=None, n_jobs=None,
chunksize=None, chunksize=None,
verbose=True): verbose=True):
"""Calculate Weisfeiler-Lehman kernels between graphs.
"""Compute Weisfeiler-Lehman kernels between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.


node_label : string node_label : string
Node attribute used as label. The default node label is atom. Node attribute used as label. The default node label is atom.
@@ -115,12 +115,12 @@ def weisfeilerlehmankernel(*args,




def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose): def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose):
"""Calculate Weisfeiler-Lehman kernels between graphs.
"""Compute Weisfeiler-Lehman kernels between graphs.


Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -146,7 +146,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz
# number of occurence of each label in G # number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori))) all_num_of_each_label.append(dict(Counter(labels_ori)))


# calculate subtree kernel with the 0th iteration and add it to the final kernel
# Compute subtree kernel with the 0th iteration and add it to the final kernel
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False)


# iterate each height # iterate each height
@@ -255,7 +255,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz
# all_labels_ori.update(labels_comp) # all_labels_ori.update(labels_comp)
all_num_of_each_label.append(dict(Counter(labels_comp))) all_num_of_each_label.append(dict(Counter(labels_comp)))


# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False)


return Kmatrix return Kmatrix
@@ -316,7 +316,7 @@ def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs,
do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
elif parallel == None:
elif parallel is None:
for i in range(len(Kmatrix)): for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)): for j in range(i, len(Kmatrix)):
Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i],
@@ -345,12 +345,12 @@ def wrapper_compute_subtree_kernel(Kmatrix, itr):


def _wl_spkernel_do(Gn, node_label, edge_label, height): def _wl_spkernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs.
"""Compute Weisfeiler-Lehman shortest path kernels between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -413,7 +413,7 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height):
for node in G.nodes(data = True): for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]] node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)): for i in range(0, len(Gn)):
for j in range(i, len(Gn)): for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True): for e1 in Gn[i].edges(data = True):
@@ -427,12 +427,12 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height):




def _wl_edgekernel_do(Gn, node_label, edge_label, height): def _wl_edgekernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman edge kernels between graphs.
"""Compute Weisfeiler-Lehman edge kernels between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -491,7 +491,7 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height):
for node in G.nodes(data = True): for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]] node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)): for i in range(0, len(Gn)):
for j in range(i, len(Gn)): for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True): for e1 in Gn[i].edges(data = True):
@@ -504,12 +504,12 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height):




def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -564,7 +564,7 @@ def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
for node in G.nodes(data = True): for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]] node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate kernel with h iterations and add it to the final kernel
# Compute kernel with h iterations and add it to the final kernel
Kmatrix += base_kernel(Gn, node_label, edge_label) Kmatrix += base_kernel(Gn, node_label, edge_label)
return Kmatrix return Kmatrix

+ 13
- 13
gklearn/kernels/weisfeiler_lehman.py View File

@@ -125,12 +125,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __subtree_kernel_do(self, Gn): def __subtree_kernel_do(self, Gn):
"""Calculate Weisfeiler-Lehman kernels between graphs.
"""Compute Weisfeiler-Lehman kernels between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
Return Return
------ ------
@@ -152,7 +152,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
# number of occurence of each label in G # number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori))) all_num_of_each_label.append(dict(Counter(labels_ori)))
# calculate subtree kernel with the 0th iteration and add it to the final kernel.
# Compute subtree kernel with the 0th iteration and add it to the final kernel.
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
# iterate each height # iterate each height
@@ -198,7 +198,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
# all_labels_ori.update(labels_comp) # all_labels_ori.update(labels_comp)
all_num_of_each_label.append(dict(Counter(labels_comp))) all_num_of_each_label.append(dict(Counter(labels_comp)))
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
return gram_matrix return gram_matrix
@@ -244,12 +244,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wl_spkernel_do(Gn, node_label, edge_label, height): def _wl_spkernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs.
"""Compute Weisfeiler-Lehman shortest path kernels between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -312,7 +312,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for node in G.nodes(data = True): for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]] node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)): for i in range(0, len(Gn)):
for j in range(i, len(Gn)): for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True): for e1 in Gn[i].edges(data = True):
@@ -326,12 +326,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wl_edgekernel_do(Gn, node_label, edge_label, height): def _wl_edgekernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman edge kernels between graphs.
"""Compute Weisfeiler-Lehman edge kernels between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -390,7 +390,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for node in G.nodes(data = True): for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]] node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)): for i in range(0, len(Gn)):
for j in range(i, len(Gn)): for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True): for e1 in Gn[i].edges(data = True):
@@ -403,12 +403,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
Parameters Parameters
---------- ----------
Gn : List of NetworkX graph Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string node_label : string
node attribute used as label. node attribute used as label.
edge_label : string edge_label : string
@@ -463,7 +463,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for node in G.nodes(data = True): for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]] node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate kernel with h iterations and add it to the final kernel
# Compute kernel with h iterations and add it to the final kernel
gram_matrix += base_kernel(Gn, node_label, edge_label) gram_matrix += base_kernel(Gn, node_label, edge_label)
return gram_matrix return gram_matrix


+ 54
- 5
gklearn/utils/dataset.py View File

@@ -13,6 +13,7 @@ import os


class Dataset(object): class Dataset(object):
def __init__(self, filename=None, filename_targets=None, **kwargs): def __init__(self, filename=None, filename_targets=None, **kwargs):
if filename is None: if filename is None:
self.__graphs = None self.__graphs = None
@@ -180,13 +181,13 @@ class Dataset(object):
# return 0 # return 0
def get_dataset_infos(self, keys=None):
def get_dataset_infos(self, keys=None, params=None):
"""Computes and returns the structure and property information of the graph dataset. """Computes and returns the structure and property information of the graph dataset.
Parameters Parameters
---------- ----------
keys : list
List of strings which indicate which informations will be returned. The
keys : list, optional
A list of strings which indicate which informations will be returned. The
possible choices includes: possible choices includes:
'substructures': sub-structures graphs contains, including 'linear', 'non 'substructures': sub-structures graphs contains, including 'linear', 'non
@@ -241,7 +242,15 @@ class Dataset(object):
'class_number': number of classes. Only available for classification problems. 'class_number': number of classes. Only available for classification problems.
'all_degree_entropy': the entropy of degree distribution of each graph.
'ave_degree_entropy': the average entropy of degree distribution of all graphs.
All informations above will be returned if `keys` is not given. All informations above will be returned if `keys` is not given.
params: dict of dict, optional
A dictinary which contains extra parameters for each possible
element in ``keys``.
Return Return
------ ------
@@ -276,6 +285,8 @@ class Dataset(object):
'node_attr_dim', 'node_attr_dim',
'edge_attr_dim', 'edge_attr_dim',
'class_number', 'class_number',
'all_degree_entropy',
'ave_degree_entropy'
] ]
# dataset size # dataset size
@@ -420,6 +431,22 @@ class Dataset(object):
self.__edge_attr_dim = self.__get_edge_attr_dim() self.__edge_attr_dim = self.__get_edge_attr_dim()
infos['edge_attr_dim'] = self.__edge_attr_dim infos['edge_attr_dim'] = self.__edge_attr_dim
# entropy of degree distribution.
if 'all_degree_entropy' in keys:
if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']):
base = params['all_degree_entropy']['base']
else:
base = None
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base)
if 'ave_degree_entropy' in keys:
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']):
base = params['ave_degree_entropy']['base']
else:
base = None
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base))
return infos return infos
@@ -653,8 +680,7 @@ class Dataset(object):
def __get_all_fill_factors(self): def __get_all_fill_factors(self):
"""
Get fill factor, the number of non-zero entries in the adjacency matrix.
"""Get fill factor, the number of non-zero entries in the adjacency matrix.


Returns Returns
------- -------
@@ -721,7 +747,30 @@ class Dataset(object):
def __get_edge_attr_dim(self): def __get_edge_attr_dim(self):
return len(self.__edge_attrs) return len(self.__edge_attrs)

def __compute_all_degree_entropy(self, base=None):
"""Compute the entropy of degree distribution of each graph.

Parameters
----------
base : float, optional
The logarithmic base to use. The default is ``e`` (natural logarithm).

Returns
-------
degree_entropy : float
The calculated entropy.
"""
from gklearn.utils.stats import entropy
degree_entropy = []
for g in self.__graphs:
degrees = list(dict(g.degree()).values())
en = entropy(degrees, base=base)
degree_entropy.append(en)
return degree_entropy
@property @property
def graphs(self): def graphs(self):


+ 52
- 0
gklearn/utils/math.py View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 7 14:43:36 2020

@author: ljia
"""

def rounder(x, decimals):
"""Round, where 5 is rounded up.

Parameters
----------
x : float
The number to be rounded.
decimals : int
Decimals to which ``x'' is rounded.

Returns
-------
string
The rounded number.
"""
x_strs = str(x).split('.')
if len(x_strs) == 2:
before = x_strs[0]
after = x_strs[1]
if len(after) > decimals:
if int(after[decimals]) >= 5:
after0s = ''
for c in after:
if c == '0':
after0s += '0'
elif c != '0':
break
if len(after0s) == decimals:
after0s = after0s[:-1]
after = after0s + str(int(after[0:decimals]) + 1)[-decimals:]
else:
after = after[0:decimals]
elif len(after) < decimals:
after += '0' * (decimals - len(after))
return before + '.' + after

elif len(x_strs) == 1:
return x_strs[0]
if __name__ == '__main__':
x = 1.0075333616
y = rounder(x, 2)
print(y)

+ 918
- 916
gklearn/utils/model_selection_precomputed.py
File diff suppressed because it is too large
View File


+ 1
- 1
gklearn/utils/parallel.py View File

@@ -63,4 +63,4 @@ def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None,
len_itr = int(len(Gn) * (len(Gn) + 1) / 2) len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr, parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr,
init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs, init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs,
chunksize=chunksize, itr_desc='calculating kernels', verbose=verbose)
chunksize=chunksize, itr_desc='Computing kernels', verbose=verbose)

+ 27
- 0
gklearn/utils/stats.py View File

@@ -0,0 +1,27 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 5 15:12:41 2020

@author: ljia
"""
from collections import Counter
from scipy import stats


def entropy(labels, base=None):
"""Calculate the entropy of a distribution for given list of labels.

Parameters
----------
labels : list
Given list of labels.
base : float, optional
The logarithmic base to use. The default is ``e`` (natural logarithm).

Returns
-------
float
The calculated entropy.
"""
return stats.entropy(list(Counter(labels).values()), base=base)

+ 80
- 0
gklearn/utils/utils.py View File

@@ -565,6 +565,86 @@ def compute_distance_matrix(gram_matrix):
return dis_mat, dis_max, dis_min, dis_mean return dis_mat, dis_max, dis_min, dis_mean




# @todo: use it in ShortestPath.
def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
"""Compute kernels between each pair of vertices in two graphs.

Parameters
----------
g1, g2 : NetworkX graph
The kernels bewteen pairs of vertices in these two graphs are computed.
node_kernels : dict
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two nodes. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns a number
as the kernel value. Ignored when nodes are unlabeled. This argument
is designated to conjugate gradient method and fixed-point iterations.
node_labels : list, optional
The list of the name strings of the node labels. The default is [].
node_attrs : list, optional
The list of the name strings of the node attributes. The default is [].

Returns
-------
vk_dict : dict
Vertex kernels keyed by vertices.
Notes
-----
This function is used by ``gklearn.kernels.FixedPoint'' and
``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1].

References
----------
.. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
Parallelization of shortest path graph kernels on multi-core cpus and gpus.
Proceedings of the Programmability Issues for Heterogeneous Multicores
(MultiProg), Vienna, Austria, 2014.
"""
vk_dict = {} # shortest path matrices dict
if len(node_labels) > 0:
# node symb and non-synb labeled
if len(node_attrs) > 0:
kn = node_kernels['mix']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in node_labels]
n2_labels = [n2[1][nl] for nl in node_labels]
n1_attrs = [n1[1][na] for na in node_attrs]
n2_attrs = [n2[1][na] for na in node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in node_labels]
n2_labels = [n2[1][nl] for nl in node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(node_attrs) > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in node_attrs]
n2_attrs = [n2[1][na] for na in node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled
else:
pass # @todo: add edge weights.
# for e1 in g1.edges(data=True):
# for e2 in g2.edges(data=True):
# if e1[2]['cost'] == e2[2]['cost']:
# kernel += 1
# return kernel

return vk_dict


def dummy_node(): def dummy_node():
""" """
/*! /*!


+ 1
- 1
setup.py View File

@@ -8,7 +8,7 @@ with open('requirements_pypi.txt') as fp:


setuptools.setup( setuptools.setup(
name="graphkit-learn", name="graphkit-learn",
version="0.2.0",
version="0.2.1",
author="Linlin Jia", author="Linlin Jia",
author_email="linlin.jia@insa-rouen.fr", author_email="linlin.jia@insa-rouen.fr",
description="A Python library for graph kernels, graph edit distances, and graph pre-images", description="A Python library for graph kernels, graph edit distances, and graph pre-images",


Loading…
Cancel
Save