Browse Source

1. fix bug when computing kernels between non-symbolic node attributes of the spkernel and the ssp kernel, add a gaussian kernel implementation.

2. update pygraph.utils.graphdatasets.get_dataset_attributes function, so that if a dataset has missing attributes it can still read the dimension of attributes.
v0.1
jajupmochi 6 years ago
parent
commit
ef19fcba5a
10 changed files with 41112 additions and 39087 deletions
  1. +38582
    -38584
      notebooks/check_gm/Letter-med.gm.eps
  2. +329
    -317
      notebooks/get_dataset_attributes.ipynb
  3. +1922
    -1
      notebooks/plot_all_graphs.ipynb
  4. +12
    -13
      notebooks/run_spkernel.py
  5. +4
    -5
      notebooks/run_structuralspkernel.py
  6. +77
    -0
      notebooks/test.py
  7. +84
    -84
      pygraph/kernels/spKernel.py
  8. +11
    -14
      pygraph/kernels/structuralspKernel.py
  9. +54
    -65
      pygraph/utils/graphdataset.py
  10. +37
    -4
      pygraph/utils/kernels.py

+ 38582
- 38584
notebooks/check_gm/Letter-med.gm.eps
File diff suppressed because it is too large
View File


+ 329
- 317
notebooks/get_dataset_attributes.ipynb View File

@@ -12,21 +12,131 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n", "\n",
"Acyclic:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 183\n",
"ave_node_num : 8.153005464480874\n",
"min_node_num : 3\n",
"max_node_num : 11\n",
"ave_edge_num : 7.1530054644808745\n",
"min_edge_num : 2\n",
"max_edge_num : 10\n",
"ave_node_degree : 2.80327868852459\n",
"min_node_degree : 2\n",
"max_node_degree : 4\n",
"node_label_num : 3\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 148\n",
"\n",
"\n",
"Alkane:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 150\n",
"ave_node_num : 8.873333333333333\n",
"min_node_num : 1\n",
"max_node_num : 10\n",
"ave_edge_num : 7.873333333333333\n",
"min_edge_num : 0\n",
"max_edge_num : 9\n",
"ave_node_degree : 3.36\n",
"min_node_degree : 0\n",
"max_node_degree : 4\n",
"node_label_num : 2\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 123\n",
"\n",
"\n",
"MAO:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : True\n",
"is_directed : False\n",
"dataset_size : 68\n",
"ave_node_num : 18.38235294117647\n",
"min_node_num : 11\n",
"max_node_num : 27\n",
"ave_edge_num : 19.63235294117647\n",
"min_edge_num : 12\n",
"max_edge_num : 29\n",
"ave_node_degree : 3.0\n",
"min_node_degree : 3\n",
"max_node_degree : 3\n",
"node_label_num : 3\n",
"edge_label_num : 4\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"PAH:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : False\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 94\n",
"ave_node_num : 20.70212765957447\n",
"min_node_num : 10\n",
"max_node_num : 28\n",
"ave_edge_num : 24.425531914893618\n",
"min_edge_num : 11\n",
"max_edge_num : 34\n",
"ave_node_degree : 3.0106382978723403\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"node_label_num : 1\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"MUTAG:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : True\n",
"is_directed : False\n",
"dataset_size : 188\n",
"ave_node_num : 17.930851063829788\n",
"min_node_num : 10\n",
"max_node_num : 28\n",
"ave_edge_num : 19.79255319148936\n",
"min_edge_num : 10\n",
"max_edge_num : 33\n",
"ave_node_degree : 3.00531914893617\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"node_label_num : 7\n",
"edge_label_num : 11\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"Letter-med:\n", "Letter-med:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : False\n", "node_labeled : False\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 2250\n", "dataset_size : 2250\n",
"ave_graph_size : 4.674666666666667\n",
"min_graph_size : 1\n",
"max_graph_size : 9\n",
"ave_graph_edge_num : 3.2057777777777776\n",
"min_graph_edge_num : 0\n",
"max_graph_edge_num : 7\n",
"ave_graph_degree : 2.012888888888889\n",
"min_graph_degree : 0\n",
"max_graph_degree : 4\n",
"ave_node_num : 4.674666666666667\n",
"min_node_num : 1\n",
"max_node_num : 9\n",
"ave_edge_num : 3.2057777777777776\n",
"min_edge_num : 0\n",
"max_edge_num : 7\n",
"ave_node_degree : 2.012888888888889\n",
"min_node_degree : 0\n",
"max_node_degree : 4\n",
"node_label_num : 0\n", "node_label_num : 0\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 2\n", "node_attr_dim : 2\n",
@@ -34,21 +144,43 @@
"class_number : 15\n", "class_number : 15\n",
"\n", "\n",
"\n", "\n",
"ENZYMES:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 600\n",
"ave_node_num : 32.63333333333333\n",
"min_node_num : 2\n",
"max_node_num : 126\n",
"ave_edge_num : 62.13666666666666\n",
"min_edge_num : 1\n",
"max_edge_num : 149\n",
"ave_node_degree : 6.086666666666667\n",
"min_node_degree : 1\n",
"max_node_degree : 9\n",
"node_label_num : 3\n",
"edge_label_num : 0\n",
"node_attr_dim : 18\n",
"edge_attr_dim : 0\n",
"class_number : 6\n",
"\n",
"\n",
"Mutagenicity:\n", "Mutagenicity:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : True\n", "edge_labeled : True\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 4337\n", "dataset_size : 4337\n",
"ave_graph_size : 30.317731150564907\n",
"min_graph_size : 4\n",
"max_graph_size : 417\n",
"ave_graph_edge_num : 30.76942587041734\n",
"min_graph_edge_num : 3\n",
"max_graph_edge_num : 112\n",
"ave_graph_degree : 3.75651371916071\n",
"min_graph_degree : 3\n",
"max_graph_degree : 4\n",
"ave_node_num : 30.317731150564907\n",
"min_node_num : 4\n",
"max_node_num : 417\n",
"ave_edge_num : 30.76942587041734\n",
"min_edge_num : 3\n",
"max_edge_num : 112\n",
"ave_node_degree : 3.75651371916071\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"node_label_num : 14\n", "node_label_num : 14\n",
"edge_label_num : 3\n", "edge_label_num : 3\n",
"node_attr_dim : 0\n", "node_attr_dim : 0\n",
@@ -56,21 +188,43 @@
"class_number : 2\n", "class_number : 2\n",
"\n", "\n",
"\n", "\n",
"D&D:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 1178\n",
"ave_node_num : 284.3166383701188\n",
"min_node_num : 30\n",
"max_node_num : 5748\n",
"ave_edge_num : 715.6587436332767\n",
"min_edge_num : 63\n",
"max_edge_num : 14267\n",
"ave_node_degree : 9.509337860780985\n",
"min_node_degree : 6\n",
"max_node_degree : 19\n",
"node_label_num : 82\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"AIDS:\n", "AIDS:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : True\n", "edge_labeled : True\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 2000\n", "dataset_size : 2000\n",
"ave_graph_size : 15.6925\n",
"min_graph_size : 2\n",
"max_graph_size : 95\n",
"ave_graph_edge_num : 16.195\n",
"min_graph_edge_num : 1\n",
"max_graph_edge_num : 103\n",
"ave_graph_degree : 3.322\n",
"min_graph_degree : 1\n",
"max_graph_degree : 6\n",
"ave_node_num : 15.6925\n",
"min_node_num : 2\n",
"max_node_num : 95\n",
"ave_edge_num : 16.195\n",
"min_edge_num : 1\n",
"max_edge_num : 103\n",
"ave_node_degree : 3.322\n",
"min_node_degree : 1\n",
"max_node_degree : 6\n",
"node_label_num : 38\n", "node_label_num : 38\n",
"edge_label_num : 3\n", "edge_label_num : 3\n",
"node_attr_dim : 4\n", "node_attr_dim : 4\n",
@@ -84,15 +238,15 @@
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 41\n", "dataset_size : 41\n",
"ave_graph_size : 1377.2682926829268\n",
"min_graph_size : 134\n",
"max_graph_size : 5037\n",
"ave_graph_edge_num : 3074.0975609756097\n",
"min_graph_edge_num : 320\n",
"max_graph_edge_num : 10888\n",
"ave_graph_degree : 7.853658536585366\n",
"min_graph_degree : 6\n",
"max_graph_degree : 10\n",
"ave_node_num : 1377.2682926829268\n",
"min_node_num : 134\n",
"max_node_num : 5037\n",
"ave_edge_num : 3074.0975609756097\n",
"min_edge_num : 320\n",
"max_edge_num : 10888\n",
"ave_node_degree : 7.853658536585366\n",
"min_node_degree : 6\n",
"max_node_degree : 10\n",
"node_label_num : 5\n", "node_label_num : 5\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 1\n", "node_attr_dim : 1\n",
@@ -101,20 +255,20 @@
"\n", "\n",
"\n", "\n",
"MSRC9:\n", "MSRC9:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 221\n", "dataset_size : 221\n",
"ave_graph_size : 40.57918552036199\n",
"min_graph_size : 25\n",
"max_graph_size : 55\n",
"ave_graph_edge_num : 97.9366515837104\n",
"min_graph_edge_num : 53\n",
"max_graph_edge_num : 145\n",
"ave_graph_degree : 10.158371040723981\n",
"min_graph_degree : 8\n",
"max_graph_degree : 16\n",
"ave_node_num : 40.57918552036199\n",
"min_node_num : 25\n",
"max_node_num : 55\n",
"ave_edge_num : 97.9366515837104\n",
"min_edge_num : 53\n",
"max_edge_num : 145\n",
"ave_node_degree : 10.158371040723981\n",
"min_node_degree : 8\n",
"max_node_degree : 16\n",
"node_label_num : 10\n", "node_label_num : 10\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 0\n", "node_attr_dim : 0\n",
@@ -123,20 +277,20 @@
"\n", "\n",
"\n", "\n",
"MSRC21:\n", "MSRC21:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 563\n", "dataset_size : 563\n",
"ave_graph_size : 77.52042628774423\n",
"min_graph_size : 51\n",
"max_graph_size : 141\n",
"ave_graph_edge_num : 198.32326820603907\n",
"min_graph_edge_num : 121\n",
"max_graph_edge_num : 405\n",
"ave_graph_degree : 11.41563055062167\n",
"min_graph_degree : 8\n",
"max_graph_degree : 23\n",
"ave_node_num : 77.52042628774423\n",
"min_node_num : 51\n",
"max_node_num : 141\n",
"ave_edge_num : 198.32326820603907\n",
"min_edge_num : 121\n",
"max_edge_num : 405\n",
"ave_node_degree : 11.41563055062167\n",
"min_node_degree : 8\n",
"max_node_degree : 23\n",
"node_label_num : 22\n", "node_label_num : 22\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 0\n", "node_attr_dim : 0\n",
@@ -145,20 +299,20 @@
"\n", "\n",
"\n", "\n",
"SYNTHETIC:\n", "SYNTHETIC:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 300\n", "dataset_size : 300\n",
"ave_graph_size : 100.0\n",
"min_graph_size : 100\n",
"max_graph_size : 100\n",
"ave_graph_edge_num : 196.0\n",
"min_graph_edge_num : 196\n",
"max_graph_edge_num : 196\n",
"ave_graph_degree : 8.0\n",
"min_graph_degree : 8\n",
"max_graph_degree : 8\n",
"ave_node_num : 100.0\n",
"min_node_num : 100\n",
"max_node_num : 100\n",
"ave_edge_num : 196.0\n",
"min_edge_num : 196\n",
"max_edge_num : 196\n",
"ave_node_degree : 8.0\n",
"min_node_degree : 8\n",
"max_node_degree : 8\n",
"node_label_num : 8\n", "node_label_num : 8\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 1\n", "node_attr_dim : 1\n",
@@ -167,20 +321,20 @@
"\n", "\n",
"\n", "\n",
"BZR:\n", "BZR:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 405\n", "dataset_size : 405\n",
"ave_graph_size : 35.75061728395062\n",
"min_graph_size : 13\n",
"max_graph_size : 57\n",
"ave_graph_edge_num : 38.358024691358025\n",
"min_graph_edge_num : 13\n",
"max_graph_edge_num : 60\n",
"ave_graph_degree : 3.8641975308641974\n",
"min_graph_degree : 3\n",
"max_graph_degree : 4\n",
"ave_node_num : 35.75061728395062\n",
"min_node_num : 13\n",
"max_node_num : 57\n",
"ave_edge_num : 38.358024691358025\n",
"min_edge_num : 13\n",
"max_edge_num : 60\n",
"ave_node_degree : 3.8641975308641974\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"node_label_num : 10\n", "node_label_num : 10\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 3\n", "node_attr_dim : 3\n",
@@ -189,20 +343,20 @@
"\n", "\n",
"\n", "\n",
"COX2:\n", "COX2:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 467\n", "dataset_size : 467\n",
"ave_graph_size : 41.224839400428266\n",
"min_graph_size : 32\n",
"max_graph_size : 56\n",
"ave_graph_edge_num : 43.44539614561028\n",
"min_graph_edge_num : 34\n",
"max_graph_edge_num : 59\n",
"ave_graph_degree : 4.0\n",
"min_graph_degree : 4\n",
"max_graph_degree : 4\n",
"ave_node_num : 41.224839400428266\n",
"min_node_num : 32\n",
"max_node_num : 56\n",
"ave_edge_num : 43.44539614561028\n",
"min_edge_num : 34\n",
"max_edge_num : 59\n",
"ave_node_degree : 4.0\n",
"min_node_degree : 4\n",
"max_node_degree : 4\n",
"node_label_num : 8\n", "node_label_num : 8\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 3\n", "node_attr_dim : 3\n",
@@ -211,20 +365,20 @@
"\n", "\n",
"\n", "\n",
"DHFR:\n", "DHFR:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 756\n", "dataset_size : 756\n",
"ave_graph_size : 42.42724867724868\n",
"min_graph_size : 20\n",
"max_graph_size : 71\n",
"ave_graph_edge_num : 44.544973544973544\n",
"min_graph_edge_num : 21\n",
"max_graph_edge_num : 73\n",
"ave_graph_degree : 3.955026455026455\n",
"min_graph_degree : 3\n",
"max_graph_degree : 4\n",
"ave_node_num : 42.42724867724868\n",
"min_node_num : 20\n",
"max_node_num : 71\n",
"ave_edge_num : 44.544973544973544\n",
"min_edge_num : 21\n",
"max_edge_num : 73\n",
"ave_node_degree : 3.955026455026455\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"node_label_num : 9\n", "node_label_num : 9\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 3\n", "node_attr_dim : 3\n",
@@ -232,43 +386,21 @@
"class_number : 2\n", "class_number : 2\n",
"\n", "\n",
"\n", "\n",
"ENZYMES:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 600\n",
"ave_graph_size : 32.63333333333333\n",
"min_graph_size : 2\n",
"max_graph_size : 126\n",
"ave_graph_edge_num : 62.13666666666666\n",
"min_graph_edge_num : 1\n",
"max_graph_edge_num : 149\n",
"ave_graph_degree : 6.086666666666667\n",
"min_graph_degree : 1\n",
"max_graph_degree : 9\n",
"node_label_num : 3\n",
"edge_label_num : 0\n",
"node_attr_dim : 18\n",
"edge_attr_dim : 0\n",
"class_number : 6\n",
"\n",
"\n",
"PROTEINS:\n", "PROTEINS:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 1113\n", "dataset_size : 1113\n",
"ave_graph_size : 39.05750224618149\n",
"min_graph_size : 4\n",
"max_graph_size : 620\n",
"ave_graph_edge_num : 72.8158131176999\n",
"min_graph_edge_num : 5\n",
"max_graph_edge_num : 1049\n",
"ave_graph_degree : 5.794249775381851\n",
"min_graph_degree : 3\n",
"max_graph_degree : 25\n",
"ave_node_num : 39.05750224618149\n",
"min_node_num : 4\n",
"max_node_num : 620\n",
"ave_edge_num : 72.8158131176999\n",
"min_edge_num : 5\n",
"max_edge_num : 1049\n",
"ave_node_degree : 5.794249775381851\n",
"min_node_degree : 3\n",
"max_node_degree : 25\n",
"node_label_num : 3\n", "node_label_num : 3\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 1\n", "node_attr_dim : 1\n",
@@ -277,135 +409,25 @@
"\n", "\n",
"\n", "\n",
"PROTEINS_full:\n", "PROTEINS_full:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 1113\n", "dataset_size : 1113\n",
"ave_graph_size : 39.05750224618149\n",
"min_graph_size : 4\n",
"max_graph_size : 620\n",
"ave_graph_edge_num : 72.8158131176999\n",
"min_graph_edge_num : 5\n",
"max_graph_edge_num : 1049\n",
"ave_graph_degree : 5.794249775381851\n",
"min_graph_degree : 3\n",
"max_graph_degree : 25\n",
"ave_node_num : 39.05750224618149\n",
"min_node_num : 4\n",
"max_node_num : 620\n",
"ave_edge_num : 72.8158131176999\n",
"min_edge_num : 5\n",
"max_edge_num : 1049\n",
"ave_node_degree : 5.794249775381851\n",
"min_node_degree : 3\n",
"max_node_degree : 25\n",
"node_label_num : 3\n", "node_label_num : 3\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 29\n", "node_attr_dim : 29\n",
"edge_attr_dim : 0\n", "edge_attr_dim : 0\n",
"class_number : 2\n", "class_number : 2\n",
"\n",
"\n",
"D&D:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 1178\n",
"ave_graph_size : 284.3166383701188\n",
"min_graph_size : 30\n",
"max_graph_size : 5748\n",
"ave_graph_edge_num : 715.6587436332767\n",
"min_graph_edge_num : 63\n",
"max_graph_edge_num : 14267\n",
"ave_graph_degree : 9.509337860780985\n",
"min_graph_degree : 6\n",
"max_graph_degree : 19\n",
"node_label_num : 82\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"MUTAG:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : True\n",
"edge_labeled : True\n",
"is_directed : False\n",
"dataset_size : 188\n",
"ave_graph_size : 17.930851063829788\n",
"min_graph_size : 10\n",
"max_graph_size : 28\n",
"ave_graph_edge_num : 19.79255319148936\n",
"min_graph_edge_num : 10\n",
"max_graph_edge_num : 33\n",
"ave_graph_degree : 3.00531914893617\n",
"min_graph_degree : 3\n",
"max_graph_degree : 4\n",
"node_label_num : 7\n",
"edge_label_num : 11\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"Alkane:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 150\n",
"ave_graph_size : 8.873333333333333\n",
"min_graph_size : 1\n",
"max_graph_size : 10\n",
"ave_graph_edge_num : 7.873333333333333\n",
"min_graph_edge_num : 0\n",
"max_graph_edge_num : 9\n",
"ave_graph_degree : 3.36\n",
"min_graph_degree : 0\n",
"max_graph_degree : 4\n",
"node_label_num : 2\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 123\n",
"\n",
"\n",
"Acyclic:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : True\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 183\n",
"ave_graph_size : 8.153005464480874\n",
"min_graph_size : 3\n",
"max_graph_size : 11\n",
"ave_graph_edge_num : 7.1530054644808745\n",
"min_graph_edge_num : 2\n",
"max_graph_edge_num : 10\n",
"ave_graph_degree : 2.80327868852459\n",
"min_graph_degree : 2\n",
"max_graph_degree : 4\n",
"node_label_num : 3\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 148\n",
"\n",
"\n",
"MAO:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : True\n",
"edge_labeled : True\n",
"is_directed : False\n",
"dataset_size : 68\n",
"ave_graph_size : 18.38235294117647\n",
"min_graph_size : 11\n",
"max_graph_size : 27\n",
"ave_graph_edge_num : 19.63235294117647\n",
"min_graph_edge_num : 12\n",
"max_graph_edge_num : 29\n",
"ave_graph_degree : 3.0\n",
"min_graph_degree : 3\n",
"max_graph_degree : 3\n",
"node_label_num : 3\n",
"edge_label_num : 4\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n" "\n"
] ]
}, },
@@ -414,43 +436,21 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n", "\n",
"PAH:\n",
"substructures : {'linear', 'non linear'}\n",
"node_labeled : False\n",
"edge_labeled : False\n",
"is_directed : False\n",
"dataset_size : 94\n",
"ave_graph_size : 20.70212765957447\n",
"min_graph_size : 10\n",
"max_graph_size : 28\n",
"ave_graph_edge_num : 24.425531914893618\n",
"min_graph_edge_num : 11\n",
"max_graph_edge_num : 34\n",
"ave_graph_degree : 3.0106382978723403\n",
"min_graph_degree : 3\n",
"max_graph_degree : 4\n",
"node_label_num : 1\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n",
"NCI1:\n", "NCI1:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 4110\n", "dataset_size : 4110\n",
"ave_graph_size : 29.8654501216545\n",
"min_graph_size : 3\n",
"max_graph_size : 111\n",
"ave_graph_edge_num : 32.3\n",
"min_graph_edge_num : 2\n",
"max_graph_edge_num : 119\n",
"ave_graph_degree : 3.3360097323600972\n",
"min_graph_degree : 2\n",
"max_graph_degree : 4\n",
"ave_node_num : 29.8654501216545\n",
"min_node_num : 3\n",
"max_node_num : 111\n",
"ave_edge_num : 32.3\n",
"min_edge_num : 2\n",
"max_edge_num : 119\n",
"ave_node_degree : 3.3360097323600972\n",
"min_node_degree : 2\n",
"max_node_degree : 4\n",
"node_label_num : 37\n", "node_label_num : 37\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 0\n", "node_attr_dim : 0\n",
@@ -459,39 +459,51 @@
"\n", "\n",
"\n", "\n",
"NCI109:\n", "NCI109:\n",
"substructures : {'linear', 'non linear'}\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n", "node_labeled : True\n",
"edge_labeled : False\n", "edge_labeled : False\n",
"is_directed : False\n", "is_directed : False\n",
"dataset_size : 4127\n", "dataset_size : 4127\n",
"ave_graph_size : 29.681124303368065\n",
"min_graph_size : 4\n",
"max_graph_size : 111\n",
"ave_graph_edge_num : 32.13084565059365\n",
"min_graph_edge_num : 3\n",
"max_graph_edge_num : 119\n",
"ave_graph_degree : 3.343833292948873\n",
"min_graph_degree : 2\n",
"max_graph_degree : 5\n",
"ave_node_num : 29.681124303368065\n",
"min_node_num : 4\n",
"max_node_num : 111\n",
"ave_edge_num : 32.13084565059365\n",
"min_edge_num : 3\n",
"max_edge_num : 119\n",
"ave_node_degree : 3.343833292948873\n",
"min_node_degree : 2\n",
"max_node_degree : 5\n",
"node_label_num : 38\n", "node_label_num : 38\n",
"edge_label_num : 0\n", "edge_label_num : 0\n",
"node_attr_dim : 0\n", "node_attr_dim : 0\n",
"edge_attr_dim : 0\n", "edge_attr_dim : 0\n",
"class_number : 2\n", "class_number : 2\n",
"\n",
"load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 430440.94it/s]\n",
"ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4352.25it/s] \n",
"\n",
"NCI-HIV:\n",
"substructures : {'non linear', 'linear'}\n",
"node_labeled : True\n",
"edge_labeled : True\n",
"is_directed : False\n",
"dataset_size : 42682\n",
"ave_node_num : 45.70945597675835\n",
"min_node_num : 2\n",
"max_node_num : 438\n",
"ave_edge_num : 47.7137903565906\n",
"min_edge_num : 1\n",
"max_edge_num : 441\n",
"ave_node_degree : 3.9760554800618526\n",
"min_node_degree : 1\n",
"max_node_degree : 12\n",
"node_label_num : 63\n",
"edge_label_num : 3\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 3\n",
"\n" "\n"
] ]
},
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'tqdm'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-1e4da065c026>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dataset'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0mfilename_y\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dataset_y'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'dataset_y'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mds\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m extra_params=(ds['extra_params'] if 'extra_params' in ds else None))\n\u001b[0m\u001b[1;32m 48\u001b[0m attrs = get_dataset_attributes(\n\u001b[1;32m 49\u001b[0m dataset, target=y, node_label='atom', edge_label='bond_type')\n",
"\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/graphfiles.py\u001b[0m in \u001b[0;36mloadDataset\u001b[0;34m(filename, filename_y, extra_params)\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mextension\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"sdf\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 379\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtqdm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 380\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tqdm'"
]
} }
], ],
"source": [ "source": [
@@ -501,8 +513,18 @@
"from pygraph.utils.graphdataset import get_dataset_attributes\n", "from pygraph.utils.graphdataset import get_dataset_attributes\n",
"\n", "\n",
"dslist = [\n", "dslist = [\n",
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},\n",
" {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',\n",
" 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},\n",
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},\n",
" {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},\n",
" {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
" 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n",
" {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", " {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
" {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
" {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", " {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
" {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
" 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n",
" {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},\n", " {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},\n",
" {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n", " {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n",
" {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n", " {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n",
@@ -510,19 +532,9 @@
" {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n", " {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n",
" {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},\n", " {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},\n",
" {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},\n", " {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},\n",
" {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'},\n",
" {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
" {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, \n",
" {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n", " {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n",
" {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},\n",
" {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
" 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n",
" {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
" 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n",
" {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',\n",
" 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},\n",
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},\n",
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},\n",
" {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},\n",
" {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, \n",
" {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", " {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
" 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n", " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
" {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", " {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",


+ 1922
- 1
notebooks/plot_all_graphs.ipynb
File diff suppressed because it is too large
View File


+ 12
- 13
notebooks/run_spkernel.py View File

@@ -1,22 +1,21 @@
import functools import functools
from libs import * from libs import *
import multiprocessing import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel


from pygraph.kernels.spKernel import spkernel from pygraph.kernels.spKernel import spkernel
from pygraph.utils.kernels import deltakernel, kernelproduct
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
#from pygraph.utils.model_selection_precomputed import trial_do #from pygraph.utils.model_selection_precomputed import trial_do


dslist = [ dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
@@ -54,9 +53,9 @@ dslist = [
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
] ]
estimator = spkernel estimator = spkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels': [ param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 3, num=27, base=10)}, param_grid = [{'C': np.logspace(-10, 3, num=27, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}] {'alpha': np.logspace(-10, 10, num=41, base=10)}]




+ 4
- 5
notebooks/run_structuralspkernel.py View File

@@ -9,10 +9,9 @@ Created on Fri Sep 28 16:37:29 2018
import functools import functools
from libs import * from libs import *
import multiprocessing import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel


from pygraph.kernels.structuralspKernel import structuralspkernel from pygraph.kernels.structuralspKernel import structuralspkernel
from pygraph.utils.kernels import deltakernel, kernelproduct
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct


dslist = [ dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
@@ -61,11 +60,11 @@ dslist = [
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
] ]
estimator = structuralspkernel estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels': param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}],
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels': 'edge_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}] {'alpha': np.logspace(-10, 10, num=41, base=10)}]




+ 77
- 0
notebooks/test.py View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 16:37:29 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel

from pygraph.kernels.structuralspKernel import structuralspkernel
from pygraph.utils.kernels import deltakernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 84
- 84
pygraph/kernels/spKernel.py View File

@@ -53,7 +53,6 @@ def spkernel(*args,
""" """
# pre-process # pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]] Gn = args[0] if len(args) == 1 else [args[0], args[1]]

weight = None weight = None
if edge_weight is None: if edge_weight is None:
print('\n None edge weight specified. Set all weight to 1.\n') print('\n None edge weight specified. Set all weight to 1.\n')
@@ -76,7 +75,8 @@ def spkernel(*args,
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'], attr_names=['node_labeled', 'node_attr_dim', 'is_directed'],
node_label=node_label) node_label=node_label)


# remove graphs with no edges, as no sp can be found in their structures, so the kernel between such a graph and itself will be zero.
# remove graphs with no edges, as no sp can be found in their structures,
# so the kernel between such a graph and itself will be zero.
len_gn = len(Gn) len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn] idx = [G[0] for G in Gn]
@@ -208,93 +208,93 @@ def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij):
g2 = Gn[j] g2 = Gn[j]
kernel = 0 kernel = 0


try:
# compute shortest path matrices first, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
# try:
# compute shortest path matrices first, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else: else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kernel += 1
return i, j, kernel

# compute graph kernels
if ds_attrs['is_directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kernel += kn1
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else: else:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']: if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
e1[0], e2[1])], vk_dict[(e1[1],
e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kn2 = nk12 * nk21
kernel += kn1 + kn2
kernel += 1
return i, j, kernel

# compute graph kernels
if ds_attrs['is_directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kernel += kn1
else:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
e1[0], e2[1])], vk_dict[(e1[1],
e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kn2 = nk12 * nk21
kernel += kn1 + kn2


# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernels
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])
# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernels
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])


# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# kernel += kn1 + kn2
except KeyError: # missing labels or attributes
pass
# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# kernel += kn1 + kn2
# except KeyError: # missing labels or attributes
# pass


return i, j, kernel return i, j, kernel




+ 11
- 14
pygraph/kernels/structuralspKernel.py View File

@@ -194,13 +194,12 @@ def structuralspkernel(*args,
# # ---- direct running, normally use single CPU core. ---- # # ---- direct running, normally use single CPU core. ----
# itr = combinations_with_replacement(range(0, len(Gn)), 2) # itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout): # for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# if gs[0] == 24 and gs[1] == 411:
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs,
# node_label, edge_label, node_kernels, edge_kernels, gs)
# if(kernel > 1):
# print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs,
# node_label, edge_label, node_kernels, edge_kernels, gs)
# if(kernel > 1):
# print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel


run_time = time.time() - start_time run_time = time.time() - start_time
print( print(
@@ -232,7 +231,7 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
g1.nodes(data=True), g2.nodes(data=True)): g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn( vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label], n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled # node symb labeled
else: else:
kn = node_kernels['symb'] kn = node_kernels['symb']
@@ -248,8 +247,8 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
vk_dict = {} # shortest path matrices dict vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True): for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True): for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled # node unlabeled
else: else:
vk_dict = {} vk_dict = {}
@@ -265,7 +264,7 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
for e1, e2 in product( for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)): g1.edges(data=True), g2.edges(data=True)):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
[e1[2]['attributes']], [e2[2]['attributes']])
e1[2]['attributes'], e2[2]['attributes'])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
@@ -288,7 +287,7 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
ek_dict = {} ek_dict = {}
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
ek_temp = kn([e1[2]['attributes']], [e2[2]['attributes']])
ek_temp = kn(e1[2]['attributes'], e2[2]['attributes'])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
@@ -374,8 +373,6 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
# print("toto") # print("toto")
# pass # pass


if(kernel > 1):
print("kernel error : ", ij)
return iglobal, jglobal, kernel return iglobal, jglobal, kernel






+ 54
- 65
pygraph/utils/graphdataset.py View File

@@ -15,29 +15,29 @@ def get_dataset_attributes(Gn,
def get_dataset_size(Gn): def get_dataset_size(Gn):
return len(Gn) return len(Gn)


def get_all_graph_size(Gn):
def get_all_node_num(Gn):
return [nx.number_of_nodes(G) for G in Gn] return [nx.number_of_nodes(G) for G in Gn]


def get_ave_graph_size(all_graph_size):
return np.mean(all_graph_size)
def get_ave_node_num(all_node_num):
return np.mean(all_node_num)


def get_min_graph_size(all_graph_size):
return np.amin(all_graph_size)
def get_min_node_num(all_node_num):
return np.amin(all_node_num)


def get_max_graph_size(Gn):
return np.amax(all_graph_size)
def get_max_node_num(all_node_num):
return np.amax(all_node_num)


def get_all_graph_edge_num(Gn):
def get_all_edge_num(Gn):
return [nx.number_of_edges(G) for G in Gn] return [nx.number_of_edges(G) for G in Gn]


def get_ave_graph_edge_num(all_graph_edge_num):
return np.mean(all_graph_edge_num)
def get_ave_edge_num(all_edge_num):
return np.mean(all_edge_num)


def get_min_graph_edge_num(all_graph_edge_num):
return np.amin(all_graph_edge_num)
def get_min_edge_num(all_edge_num):
return np.amin(all_edge_num)


def get_max_graph_edge_num(all_graph_edge_num):
return np.amax(all_graph_edge_num)
def get_max_edge_num(all_edge_num):
return np.amax(all_edge_num)


def is_node_labeled(Gn): def is_node_labeled(Gn):
return False if node_label is None else True return False if node_label is None else True
@@ -60,13 +60,13 @@ def get_dataset_attributes(Gn,
def is_directed(Gn): def is_directed(Gn):
return nx.is_directed(Gn[0]) return nx.is_directed(Gn[0])


def get_ave_graph_degree(Gn):
def get_ave_node_degree(Gn):
return np.mean([np.amax(list(dict(G.degree()).values())) for G in Gn]) return np.mean([np.amax(list(dict(G.degree()).values())) for G in Gn])


def get_max_graph_degree(Gn):
def get_max_node_degree(Gn):
return np.amax([np.amax(list(dict(G.degree()).values())) for G in Gn]) return np.amax([np.amax(list(dict(G.degree()).values())) for G in Gn])


def get_min_graph_degree(Gn):
def get_min_node_degree(Gn):
return np.amin([np.amax(list(dict(G.degree()).values())) for G in Gn]) return np.amin([np.amax(list(dict(G.degree()).values())) for G in Gn])


def get_substructures(Gn): def get_substructures(Gn):
@@ -107,11 +107,11 @@ def get_dataset_attributes(Gn,
return len(set(target)) return len(set(target))


def get_node_attr_dim(Gn): def get_node_attr_dim(Gn):
attrs = Gn[0].nodes[0]
if 'attributes' in attrs:
return len(attrs['attributes'])
else:
return 0
for G in Gn:
for n in G.nodes(data=True):
if 'attributes' in n[1]:
return len(n[1]['attributes'])
return 0


def get_edge_attr_dim(Gn): def get_edge_attr_dim(Gn):
for G in Gn: for G in Gn:
@@ -119,8 +119,6 @@ def get_dataset_attributes(Gn,
for e in G.edges(data=True): for e in G.edges(data=True):
if 'attributes' in e[2]: if 'attributes' in e[2]:
return len(e[2]['attributes']) return len(e[2]['attributes'])
else:
return 0
return 0 return 0


if attr_names == []: if attr_names == []:
@@ -130,15 +128,15 @@ def get_dataset_attributes(Gn,
'edge_labeled', 'edge_labeled',
'is_directed', 'is_directed',
'dataset_size', 'dataset_size',
'ave_graph_size',
'min_graph_size',
'max_graph_size',
'ave_graph_edge_num',
'min_graph_edge_num',
'max_graph_edge_num',
'ave_graph_degree',
'min_graph_degree',
'max_graph_degree',
'ave_node_num',
'min_node_num',
'max_node_num',
'ave_edge_num',
'min_edge_num',
'max_edge_num',
'ave_node_degree',
'min_node_degree',
'max_node_degree',
'node_label_num', 'node_label_num',
'edge_label_num', 'edge_label_num',
'node_attr_dim', 'node_attr_dim',
@@ -151,50 +149,41 @@ def get_dataset_attributes(Gn,


attrs.update({'dataset_size': get_dataset_size(Gn)}) attrs.update({'dataset_size': get_dataset_size(Gn)})


# graph size
# graph node number
if any(i in attr_names if any(i in attr_names
for i in ['ave_graph_size', 'min_graph_size', 'max_graph_size']):
for i in ['ave_node_num', 'min_node_num', 'max_node_num']):


all_graph_size = get_all_graph_size(Gn)
all_node_num = get_all_node_num(Gn)


if 'ave_graph_size' in attr_names:
if 'ave_node_num' in attr_names:


attrs.update({'ave_graph_size': get_ave_graph_size(all_graph_size)})
attrs.update({'ave_node_num': get_ave_node_num(all_node_num)})


if 'min_graph_size' in attr_names:
if 'min_node_num' in attr_names:


attrs.update({'min_graph_size': get_min_graph_size(all_graph_size)})
attrs.update({'min_node_num': get_min_node_num(all_node_num)})


if 'max_graph_size' in attr_names:
if 'max_node_num' in attr_names:


attrs.update({'max_graph_size': get_max_graph_size(all_graph_size)})
attrs.update({'max_node_num': get_max_node_num(all_node_num)})


# graph edge number # graph edge number
if any(i in attr_names for i in if any(i in attr_names for i in
['ave_graph_edge_num', 'min_graph_edge_num', 'max_graph_edge_num']):
['ave_edge_num', 'min_edge_num', 'max_edge_num']):


all_graph_edge_num = get_all_graph_edge_num(Gn)
all_edge_num = get_all_edge_num(Gn)


if 'ave_graph_edge_num' in attr_names:
if 'ave_edge_num' in attr_names:


attrs.update({
'ave_graph_edge_num':
get_ave_graph_edge_num(all_graph_edge_num)
})
attrs.update({'ave_edge_num': get_ave_edge_num(all_edge_num)})


if 'max_graph_edge_num' in attr_names:
if 'max_edge_num' in attr_names:


attrs.update({
'max_graph_edge_num':
get_max_graph_edge_num(all_graph_edge_num)
})
attrs.update({'max_edge_num': get_max_edge_num(all_edge_num)})


if 'min_graph_edge_num' in attr_names:
if 'min_edge_num' in attr_names:


attrs.update({
'min_graph_edge_num':
get_min_graph_edge_num(all_graph_edge_num)
})
attrs.update({'min_edge_num': get_min_edge_num(all_edge_num)})


# label number # label number
if any(i in attr_names for i in ['node_labeled', 'node_label_num']): if any(i in attr_names for i in ['node_labeled', 'node_label_num']):
@@ -222,14 +211,14 @@ def get_dataset_attributes(Gn,
if 'is_directed' in attr_names: if 'is_directed' in attr_names:
attrs.update({'is_directed': is_directed(Gn)}) attrs.update({'is_directed': is_directed(Gn)})


if 'ave_graph_degree' in attr_names:
attrs.update({'ave_graph_degree': get_ave_graph_degree(Gn)})
if 'ave_node_degree' in attr_names:
attrs.update({'ave_node_degree': get_ave_node_degree(Gn)})


if 'max_graph_degree' in attr_names:
attrs.update({'max_graph_degree': get_max_graph_degree(Gn)})
if 'max_node_degree' in attr_names:
attrs.update({'max_node_degree': get_max_node_degree(Gn)})


if 'min_graph_degree' in attr_names:
attrs.update({'min_graph_degree': get_min_graph_degree(Gn)})
if 'min_node_degree' in attr_names:
attrs.update({'min_node_degree': get_min_node_degree(Gn)})


if 'substructures' in attr_names: if 'substructures' in attr_names:
attrs.update({'substructures': get_substructures(Gn)}) attrs.update({'substructures': get_substructures(Gn)})


+ 37
- 4
pygraph/utils/kernels.py View File

@@ -1,6 +1,7 @@
"""Those who are not graph kernels. We can be kernels for nodes or edges! """Those who are not graph kernels. We can be kernels for nodes or edges!
These kernels are defined between pairs of vectors.
""" """
import numpy as np


def deltakernel(x, y): def deltakernel(x, y):
"""Delta kernel. Return 1 if x == y, 0 otherwise. """Delta kernel. Return 1 if x == y, 0 otherwise.
@@ -17,15 +18,47 @@ def deltakernel(x, y):


References References
---------- ----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between
labeled graphs. In Proceedings of the 20th International Conference on
Machine Learning, Washington, DC, United States, 2003.
""" """
return x == y #(1 if condition else 0) return x == y #(1 if condition else 0)




def gaussiankernel(x, y):
def gaussiankernel(x, y, gamma=None):
"""Gaussian kernel. Use sklearn.metrics.pairwise.rbf_kernel instead. """Gaussian kernel. Use sklearn.metrics.pairwise.rbf_kernel instead.
Compute the rbf (gaussian) kernel between X and Y:

K(x, y) = exp(-gamma ||x-y||^2)

for each pair of rows x in X and y in Y.

Read more in the :ref:`User Guide <rbf_kernel>`.

Parameters
----------
X : array of shape (n_features)

Y : array of shape (n_features)

gamma : float, default None
If None, defaults to 1.0 / n_features

Returns
-------
kernel : integer
""" """
pass
if gamma is None:
gamma = 1.0 / len(x)

xt = np.array([float(itm) for itm in x])
yt = np.array([float(itm) for itm in y])
kernel = xt - yt
kernel = kernel ** 2
kernel = np.sum(kernel)
kernel *= -gamma
kernel = np.exp(kernel)
return kernel




def kernelsum(k1, k2, d11, d12, d21=None, d22=None, lamda1=1, lamda2=1): def kernelsum(k1, k2, d11, d12, d21=None, d22=None, lamda1=1, lamda2=1):


Loading…
Cancel
Save