{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'O', 'C'}\n", "{'O', 'C'}\n", "--- shortest path kernel built in 0.0002582073211669922 seconds ---\n", "3\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'O'})]\n", " -> \n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CO'}), (3, {'label': 'CCCO'}), (4, {'label': 'OCC'})]\n", " -> \n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '3'}), (3, {'label': '1'}), (4, {'label': '2'})]\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'C'}), (5, {'label': 'C'}), (6, {'label': 'O'})]\n", " -> \n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CC'}), (3, {'label': 'CO'}), (4, {'label': 'CCCC'}), (5, {'label': 'CCCO'}), (6, {'label': 'OCC'})]\n", " -> \n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '0'}), (3, {'label': '3'}), (4, {'label': '4'}), (5, {'label': '1'}), (6, {'label': '2'})]\n", "--- shortest path kernel built in 0.00026607513427734375 seconds ---\n", "6\n" ] } ], "source": [ "import sys\n", "import networkx as nx\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.spkernel import spkernel\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "def weisfeilerlehman_test(G):\n", " '''\n", " Weisfeiler-Lehman test of graph isomorphism.\n", " '''\n", "\n", " nx.draw_networkx(G)\n", " plt.show()\n", " nx.draw_networkx_labels(G, nx.spring_layout(G), labels = nx.get_node_attributes(G,'label'))\n", " print(G.nodes(data = True))\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", " # Multiset-label determination.\n", " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", " # sorting each multiset\n", " multiset.sort()\n", " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", " set_multisets.append(multiset)\n", " \n", " # label compression\n", "# set_multisets.sort() # this is unnecessary\n", " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", " set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels\n", "# print(set_compressed)\n", "# print(set_multisets)\n", " \n", " # relabel nodes with multisets\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_multisets[node[0]]\n", " print(' -> ')\n", " nx.draw_networkx(G)\n", " plt.show()\n", " print(G.nodes(data = True))\n", "\n", " \n", " # relabel nodes\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", " \n", " print(' -> ')\n", " nx.draw_networkx(G)\n", " plt.show()\n", " print(G.nodes(data = True))\n", "\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[12]\n", "G2 = dataset[55]\n", "\n", "# init.\n", "kernel = 0 # init kernel\n", "num_nodes1 = G1.number_of_nodes()\n", "num_nodes2 = G2.number_of_nodes()\n", "\n", "# the first iteration.\n", "labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", "labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", "print(labelset1)\n", "print(labelset2)\n", "kernel += spkernel(G1, G2)\n", "print(kernel)\n", "\n", "\n", "\n", "for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height?\n", " if labelset1 != labelset2:\n", " break\n", " \n", " # Weisfeiler-Lehman test of graph isomorphism.\n", " weisfeilerlehman_test(G1)\n", " weisfeilerlehman_test(G2)\n", " \n", " # calculate kernel\n", " kernel += spkernel(G1, G2)\n", " \n", " # get label sets of both graphs\n", " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", "# print(labelset1)\n", "# print(labelset2)\n", "\n", "print(kernel)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}\n", "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'C', 6: 'S', 7: 'S'}\n", "\n", " --- Weisfeiler-Lehman subtree kernel built in 0.000997781753540039 seconds ---\n" ] }, { "data": { "text/plain": [ "array([[ 120., 73.],\n", " [ 73., 146.]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test of WL subtree kernel on many graphs\n", "\n", "import sys\n", "import pathlib\n", "from collections import Counter\n", "sys.path.insert(0, \"../\")\n", "\n", "import networkx as nx\n", "import numpy as np\n", "import time\n", "\n", "from pygraph.kernels.spkernel import spkernel\n", "from pygraph.kernels.pathKernel import pathkernel\n", "\n", "def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):\n", " \"\"\"Calculate Weisfeiler-Lehman kernels between graphs.\n", " \n", " Parameters\n", " ----------\n", " Gn : List of NetworkX graph\n", " List of graphs between which the kernels are calculated.\n", " /\n", " G1, G2 : NetworkX graphs\n", " 2 graphs between which the kernel is calculated.\n", " \n", " height : subtree height\n", " \n", " base_kernel : base kernel used in each iteration of WL kernel\n", " the default base kernel is subtree kernel\n", " \n", " Return\n", " ------\n", " Kmatrix/Kernel : Numpy matrix/int\n", " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.\n", " \n", " Notes\n", " -----\n", " This function now supports WL subtree kernel and WL shortest path kernel.\n", " \n", " References\n", " ----------\n", " [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.\n", " \"\"\"\n", " if len(args) == 1: # for a list of graphs\n", "\n", "# print(args)\n", " start_time = time.time()\n", " \n", " # for WL subtree kernel\n", " if base_kernel == 'subtree': \n", " Kmatrix = _wl_subtreekernel_do(args[0], height = height, base_kernel = 'subtree')\n", " \n", " # for WL edge kernel\n", " elif base_kernel == 'edge':\n", " print('edge')\n", " \n", " # for WL shortest path kernel\n", " elif base_kernel == 'sp':\n", " Gn = args[0]\n", " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", " \n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j])\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", "\n", " print(\"\\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---\" % (base_kernel, len(args[0]), (time.time() - start_time)))\n", " \n", " return Kmatrix\n", " \n", " else: # for only 2 graphs\n", " \n", " start_time = time.time()\n", " \n", " # for WL subtree kernel\n", " if base_kernel == 'subtree':\n", " \n", " args = [args[0], args[1]]\n", "# print(args)\n", " kernel = _wl_subtreekernel_do(args, height = height, base_kernel = 'subtree')\n", " \n", " # for WL edge kernel\n", " elif base_kernel == 'edge':\n", " print('edge')\n", " \n", " # for WL shortest path kernel\n", " elif base_kernel == 'sp':\n", " \n", "\n", " kernel = _pathkernel_do(args[0], args[1])\n", "\n", " print(\"\\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---\" % (base_kernel, time.time() - start_time))\n", " \n", " return kernel\n", " \n", " \n", "def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):\n", " \"\"\"Calculate Weisfeiler-Lehman subtree kernels between graphs.\n", " \n", " Parameters\n", " ----------\n", " Gn : List of NetworkX graph\n", " List of graphs between which the kernels are calculated.\n", " \n", " Return\n", " ------\n", " Kmatrix/Kernel : Numpy matrix/int\n", " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.\n", " \"\"\"\n", " \n", "# print(args)\n", " Gn = args[0]\n", "# print(Gn)\n", "\n", " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", " all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs\n", " \n", " # iterate each height\n", " for h in range(height + 1):\n", "# print('\\n --- height = %d --- ' % (h))\n", " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n", "# all_labels_comp = set() # all unique compressed labels in all graphs in this iteration\n", " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n", " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", " \n", " # for each graph\n", " for idx, G in enumerate(Gn):\n", " # get the set of original labels\n", "# print('\\n --- for graph %d --- \\n' % (idx))\n", " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", "# print('labels_ori: %s' % (labels_ori))\n", " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", "# print('num_of_each_label: %s' % (num_of_each_label))\n", " num_of_labels = len(num_of_each_label) # number of all unique labels\n", "# print('num_of_labels: %s' % (num_of_labels))\n", " \n", " all_labels_ori.update(labels_ori)\n", "# print('all_labels_ori: %s' % (all_labels_ori))\n", " # num_of_labels_occured += num_of_labels #@todo not precise\n", " num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", " # Multiset-label determination.\n", " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", " # sorting each multiset\n", " multiset.sort()\n", " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", " set_multisets.append(multiset)\n", "# print('multiset: %s' % (set_multisets))\n", "\n", " # label compression\n", " # set_multisets.sort() # this is unnecessary\n", " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", "# print('set_unique: %s' % (set_unique))\n", " # a dictionary mapping original labels to new ones. \n", " set_compressed = {}\n", " # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label \n", " for value in set_unique:\n", " if value in all_set_compressed.keys():\n", " set_compressed.update({ value : all_set_compressed[value] })\n", " else:\n", " set_compressed.update({ value : str(num_of_labels_occured + 1) })\n", " num_of_labels_occured += 1\n", "# set_compressed = { value : (all_set_compressed[value] if value in all_set_compressed.keys() else str(set_unique.index(value) + num_of_labels_occured + 1)) for value in set_unique }\n", "# print('set_compressed: %s' % (set_compressed))\n", " \n", " all_set_compressed.update(set_compressed)\n", "# print('all_set_compressed: %s' % (all_set_compressed))\n", "# num_of_labels_occured += len(set_compressed) #@todo not precise\n", "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", "\n", " # relabel nodes\n", " # nx.relabel_nodes(G, set_compressed, copy = False)\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", "# print('\\n compressed labels: %s' % (nx.get_node_attributes(G, 'label')))\n", "\n", " # get the set of compressed labels\n", " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", "# print('labels_comp: %s' % (labels_comp))\n", " num_of_each_label.update(dict(Counter(labels_comp)))\n", "# print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", "# print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", " \n", " # calculate subtree kernel with h iterations and add it to the final kernel\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", "# print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", "# print('\\n vector1: %s' % (vector1))\n", "# print('\\n vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", " \n", " all_num_of_labels_occured += len(all_labels_ori)\n", "# print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", "# print('\\n Kmatrix: %s' % (Kmatrix))\n", "\n", " return Kmatrix\n", " \n", " \n", " \n", "def _weisfeilerlehmankernel_do(G1, G2):\n", " \"\"\"Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration.\n", " \n", " Parameters\n", " ----------\n", " G1, G2 : NetworkX graphs\n", " 2 graphs between which the kernel is calculated.\n", " \n", " Return\n", " ------\n", " Kernel : int\n", " Weisfeiler-Lehman Kernel between 2 graphs.\n", " \"\"\"\n", " \n", " # init.\n", " kernel = 0 # init kernel\n", " num_nodes1 = G1.number_of_nodes()\n", " num_nodes2 = G2.number_of_nodes()\n", " height = 12 #min(num_nodes1, num_nodes2)) #Q how to determine the upper bound of the height?\n", " \n", " # the first iteration.\n", "# labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", "# labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", " kernel += pathkernel(G1, G2) # change your base kernel here (and one more below)\n", " \n", " for h in range(0, height):\n", "# if labelset1 != labelset2:\n", "# break\n", "\n", " # Weisfeiler-Lehman test of graph isomorphism.\n", " relabel(G1)\n", " relabel(G2)\n", "\n", " # calculate kernel\n", " kernel += pathkernel(G1, G2) # change your base kernel here (and one more before)\n", "\n", " # get label sets of both graphs\n", "# labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", "# labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", " \n", " return kernel\n", "\n", "\n", "def relabel(G):\n", " '''\n", " Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism.\n", " \n", " Parameters\n", " ----------\n", " G : NetworkX graph\n", " The graphs whose nodes are relabeled.\n", " '''\n", " \n", " # get the set of original labels\n", " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", " print(labels_ori)\n", " num_of_each_label = dict(Counter(labels_ori))\n", " print(num_of_each_label)\n", " num_of_labels = len(num_of_each_label)\n", " print(num_of_labels)\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", " # Multiset-label determination.\n", " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", " # sorting each multiset\n", " multiset.sort()\n", " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", " set_multisets.append(multiset)\n", " print(set_multisets)\n", " \n", " # label compression\n", "# set_multisets.sort() # this is unnecessary\n", " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", " print(set_unique)\n", " set_compressed = { value : str(set_unique.index(value) + num_of_labels + 1) for value in set_unique } # assign new labels\n", " print(set_compressed)\n", " \n", " # relabel nodes\n", "# nx.relabel_nodes(G, set_compressed, copy = False)\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", " print(nx.get_node_attributes(G, 'label'))\n", "\n", " # get the set of compressed labels\n", " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", " print(labels_comp)\n", " num_of_each_label.update(dict(Counter(labels_comp)))\n", " print(num_of_each_label)\n", " \n", "# main\n", "import sys\n", "from collections import Counter\n", "import networkx as nx\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.spkernel import spkernel\n", "\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[15]\n", "print(nx.get_node_attributes(G1, 'label'))\n", "G2 = dataset[80]\n", "print(nx.get_node_attributes(G2, 'label'))\n", "\n", "weisfeilerlehmankernel(G1, G2, height = 3)\n", "# Kmatrix = weisfeilerlehmankernel(G1, G2)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "- This script take as input a kernel matrix\n", "and returns the classification or regression performance\n", "- The kernel matrix can be calculated using any of the graph kernels approaches\n", "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\n", "\n", " --- calculating kernel matrix when subtree height = 0 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.4662923812866211 seconds ---\n", "[[ 10. 10. 4. ..., 20. 20. 20.]\n", " [ 10. 16. 4. ..., 20. 20. 20.]\n", " [ 4. 4. 10. ..., 22. 22. 24.]\n", " ..., \n", " [ 20. 20. 22. ..., 130. 130. 122.]\n", " [ 20. 20. 22. ..., 130. 130. 122.]\n", " [ 20. 20. 24. ..., 122. 122. 154.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 12.761978\n", "With standard deviation: 10.086502\n", "\n", " Mean performance on test set: 9.014031\n", "With standard deviation: 6.357865\n", "\n", " --- calculating kernel matrix when subtree height = 1 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.0266029834747314 seconds ---\n", "[[ 20. 14. 5. ..., 21. 21. 21.]\n", " [ 14. 32. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 20. ..., 27. 27. 29.]\n", " ..., \n", " [ 21. 28. 27. ..., 188. 180. 145.]\n", " [ 21. 28. 27. ..., 180. 182. 145.]\n", " [ 21. 22. 29. ..., 145. 145. 237.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 23.566562\n", "With standard deviation: 8.077208\n", "\n", " Mean performance on test set: 20.049905\n", "With standard deviation: 5.371530\n", "\n", " --- calculating kernel matrix when subtree height = 2 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.5859417915344238 seconds ---\n", "[[ 30. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 48. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 30. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 246. 209. 147.]\n", " [ 21. 28. 27. ..., 209. 220. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 285.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 22.993499\n", "With standard deviation: 7.239426\n", "\n", " Mean performance on test set: 24.492129\n", "With standard deviation: 6.452793\n", "\n", " --- calculating kernel matrix when subtree height = 3 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.183588743209839 seconds ---\n", "[[ 40. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 64. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 40. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 304. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 250. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 313.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 25.725908\n", "With standard deviation: 6.926961\n", "\n", " Mean performance on test set: 28.788266\n", "With standard deviation: 6.647412\n", "\n", " --- calculating kernel matrix when subtree height = 4 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.812775135040283 seconds ---\n", "[[ 50. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 80. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 50. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 362. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 280. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 335.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 27.468462\n", "With standard deviation: 6.299287\n", "\n", " Mean performance on test set: 30.746552\n", "With standard deviation: 6.784874\n", "\n", " --- calculating kernel matrix when subtree height = 5 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.2676210403442383 seconds ---\n", "[[ 60. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 96. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 60. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 420. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 310. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 357.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 29.306312\n", "With standard deviation: 6.114137\n", "\n", " Mean performance on test set: 32.285772\n", "With standard deviation: 6.997238\n", "\n", " --- calculating kernel matrix when subtree height = 6 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.769099712371826 seconds ---\n", "[[ 70. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 112. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 70. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 478. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 340. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 379.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 31.148582\n", "With standard deviation: 6.539200\n", "\n", " Mean performance on test set: 33.307557\n", "With standard deviation: 6.973525\n", "\n", " --- calculating kernel matrix when subtree height = 7 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.280002593994141 seconds ---\n", "[[ 80. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 128. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 80. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 536. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 370. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 401.]]\n", "\n", " Saving kernel matrix to file...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " Mean performance on val set: 32.032850\n", "With standard deviation: 6.532571\n", "\n", " Mean performance on test set: 34.178640\n", "With standard deviation: 7.199991\n", "\n", " --- calculating kernel matrix when subtree height = 8 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.899713039398193 seconds ---\n", "[[ 90. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 144. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 90. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 594. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 400. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 423.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 32.528857\n", "With standard deviation: 6.457013\n", "\n", " Mean performance on test set: 34.567277\n", "With standard deviation: 7.286496\n", "\n", " --- calculating kernel matrix when subtree height = 9 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.369555711746216 seconds ---\n", "[[ 100. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 160. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 100. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 652. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 430. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 445.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 33.330372\n", "With standard deviation: 6.025540\n", "\n", " Mean performance on test set: 34.866378\n", "With standard deviation: 7.300222\n", "\n", " --- calculating kernel matrix when subtree height = 10 ---\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 6.053786277770996 seconds ---\n", "[[ 110. 14. 5. ..., 21. 21. 23.]\n", " [ 14. 176. 4. ..., 28. 28. 22.]\n", " [ 5. 4. 110. ..., 27. 27. 32.]\n", " ..., \n", " [ 21. 28. 27. ..., 710. 217. 147.]\n", " [ 21. 28. 27. ..., 217. 460. 147.]\n", " [ 23. 22. 32. ..., 147. 147. 467.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", " Mean performance on val set: 33.511818\n", "With standard deviation: 6.105000\n", "\n", " Mean performance on test set: 35.052081\n", "With standard deviation: 7.298425\n", "\n", "\n", " RMSE height std\n", "-------- -------- -------\n", " 9.01403 0 6.35786\n", "20.0499 1 5.37153\n", "24.4921 2 6.45279\n", "28.7883 3 6.64741\n", "30.7466 4 6.78487\n", "32.2858 5 6.99724\n", "33.3076 6 6.97352\n", "34.1786 7 7.19999\n", "34.5673 8 7.2865\n", "34.8664 9 7.30022\n", "35.0521 10 7.29842\n" ] } ], "source": [ "# Author: Elisabetta Ghisu\n", "# test of WL subtree kernel\n", "\n", "\"\"\"\n", "- This script take as input a kernel matrix\n", "and returns the classification or regression performance\n", "- The kernel matrix can be calculated using any of the graph kernels approaches\n", "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\"\"\"\n", "\n", "print(__doc__)\n", "\n", "import sys\n", "import os\n", "import pathlib\n", "sys.path.insert(0, \"../\")\n", "from tabulate import tabulate\n", "\n", "import random\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.kernel_ridge import KernelRidge # 0.17\n", "from sklearn.metrics import accuracy_score, mean_squared_error\n", "from sklearn import svm\n", "\n", "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", "from pygraph.utils.graphfiles import loadDataset\n", "\n", "val_means_height = []\n", "val_stds_height = []\n", "test_means_height = []\n", "test_stds_height = []\n", "\n", "\n", "for height in np.linspace(0, 10, 11):\n", " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n", "\n", " print('\\n Loading dataset from file...')\n", " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", " y = np.array(y)\n", " print(y)\n", "\n", " # setup the parameters\n", " model_type = 'regression' # Regression or classification problem\n", " print('\\n --- This is a %s problem ---' % model_type)\n", "\n", " datasize = len(dataset)\n", " trials = 100 # Trials for hyperparameters random search\n", " splits = 10 # Number of splits of the data\n", " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", " random.seed(20) # Set the seed for uniform parameter distribution\n", "\n", " # set the output path\n", " kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", " if not os.path.exists(kernel_file_path):\n", " os.makedirs(kernel_file_path)\n", "\n", " \"\"\"\n", " - Here starts the main program\n", " - First we permute the data, then for each split we evaluate corresponding performances\n", " - In the end, the performances are averaged over the test sets\n", " \"\"\"\n", "\n", " # save kernel matrices to files / read kernel matrices from files\n", " kernel_file = kernel_file_path + 'km.ds'\n", " path = pathlib.Path(kernel_file)\n", " # get train set kernel matrix\n", " if path.is_file():\n", " print('\\n Loading the kernel matrix from file...')\n", " Kmatrix = np.loadtxt(kernel_file)\n", " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", " Kmatrix = weisfeilerlehmankernel(dataset, height = int(height))\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " # np.savetxt(kernel_file, Kmatrix)\n", "\n", " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", " val_split = []\n", " test_split = []\n", "\n", " # For each split of the data\n", " for j in range(10, 10 + splits):\n", " # print('\\n Starting split %d...' % j)\n", "\n", " # Set the random set for data permutation\n", " random_state = int(j)\n", " np.random.seed(random_state)\n", " idx_perm = np.random.permutation(datasize)\n", " # print(idx_perm)\n", "\n", " # Permute the data\n", " y_perm = y[idx_perm] # targets permutation\n", " # print(y_perm)\n", " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", " # print(Kmatrix_perm)\n", " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", "\n", " # Set the training, validation and test\n", " # Note: the percentage can be set up by the user\n", " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", "\n", " # Split the kernel matrix\n", " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", "\n", " # Split the targets\n", " y_train = y_perm[0:num_train]\n", "\n", " # Normalization step (for real valued targets only)\n", " if model_type == 'regression':\n", " # print('\\n Normalizing output y...')\n", " y_train_mean = np.mean(y_train)\n", " y_train_std = np.std(y_train)\n", " y_train = (y_train - y_train_mean) / float(y_train_std)\n", " # print(y)\n", "\n", " y_val = y_perm[num_train:(num_train + num_val)]\n", " y_test = y_perm[(num_train + num_val):datasize]\n", "\n", " # Record the performance for each parameter trial respectively on validation and test set\n", " perf_all_val = []\n", " perf_all_test = []\n", "\n", " # For each parameter trial\n", " for i in range(trials):\n", " # For regression use the Kernel Ridge method\n", " if model_type == 'regression':\n", " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", "\n", " # Fit the kernel ridge model\n", " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", " KR.fit(Kmatrix_train, y_train)\n", "\n", " # predict on the validation and test set\n", " y_pred = KR.predict(Kmatrix_val)\n", " y_pred_test = KR.predict(Kmatrix_test)\n", " # print(y_pred)\n", "\n", " # adjust prediction: needed because the training targets have been normalizaed\n", " y_pred = y_pred * float(y_train_std) + y_train_mean\n", " # print(y_pred)\n", " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", " # print(y_pred_test)\n", "\n", " # root mean squared error on validation\n", " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", " perf_all_val.append(rmse)\n", "\n", " # root mean squared error in test \n", " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", " perf_all_test.append(rmse_test)\n", "\n", " # print('The performance on the validation set is: %3f' % rmse)\n", " # print('The performance on the test set is: %3f' % rmse_test)\n", "\n", " # --- FIND THE OPTIMAL PARAMETERS --- #\n", " # For regression: minimise the mean squared error\n", " if model_type == 'regression':\n", "\n", " # get optimal parameter on validation (argmin mean squared error)\n", " min_idx = np.argmin(perf_all_test)\n", " alpha_opt = alpha_grid[min_idx]\n", "\n", " # performance corresponding to optimal parameter on val\n", " perf_val_opt = perf_all_val[min_idx]\n", "\n", " # corresponding performance on test for the same parameter\n", " perf_test_opt = perf_all_test[min_idx]\n", "\n", " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n", " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", "\n", " # append the best performance on validation\n", " # at the current split\n", " val_split.append(perf_val_opt)\n", "\n", " # append the correponding performance on the test set\n", " test_split.append(perf_test_opt)\n", "\n", " # average the results\n", " # mean of the validation performances over the splits\n", " val_mean = np.mean(np.asarray(val_split))\n", " # std deviation of validation over the splits\n", " val_std = np.std(np.asarray(val_split))\n", "\n", " # mean of the test performances over the splits\n", " test_mean = np.mean(np.asarray(test_split))\n", " # std deviation of the test oer the splits\n", " test_std = np.std(np.asarray(test_split))\n", "\n", " print('\\n Mean performance on val set: %3f' % val_mean)\n", " print('With standard deviation: %3f' % val_std)\n", " print('\\n Mean performance on test set: %3f' % test_mean)\n", " print('With standard deviation: %3f' % test_std)\n", " \n", " val_means_height.append(val_mean)\n", " val_stds_height.append(val_std)\n", " test_means_height.append(test_mean)\n", " test_stds_height.append(test_std)\n", " \n", "print('\\n') \n", "print(tabulate({'height': np.linspace(0, 10, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "- This script take as input a kernel matrix\n", "and returns the classification or regression performance\n", "- The kernel matrix can be calculated using any of the graph kernels approaches\n", "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\n", "\n", " Loading dataset from file...\n", "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", "\n", " --- This is a regression problem ---\n", "\n", " Loading the kernel matrix from file...\n", "[[ 7.22222222e+00 2.66666667e+00 6.66666667e+00 ..., 1.45454545e-01\n", " 1.21212121e-02 2.42424242e-02]\n", " [ 2.66666667e+00 3.61111111e+00 2.88888889e+00 ..., 1.21212121e-02\n", " 6.06060606e-03 1.21212121e-02]\n", " [ 6.66666667e+00 2.88888889e+00 7.22222222e+00 ..., 0.00000000e+00\n", " 0.00000000e+00 1.21212121e-02]\n", " ..., \n", " [ 1.45454545e-01 1.21212121e-02 0.00000000e+00 ..., 7.34876033e-01\n", " 6.28099174e-03 2.64462810e-03]\n", " [ 1.21212121e-02 6.06060606e-03 0.00000000e+00 ..., 6.28099174e-03\n", " 4.08264463e-01 5.61983471e-03]\n", " [ 2.42424242e-02 1.21212121e-02 1.21212121e-02 ..., 2.64462810e-03\n", " 5.61983471e-03 2.36363636e-01]]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/ljia/.local/lib/python3.5/site-packages/sklearn/linear_model/ridge.py:154: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n", " warnings.warn(\"Singular matrix in solving dual problem. Using \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " Mean performance on val set: 45.403838\n", "With standard deviation: 5.683093\n", "\n", " Mean performance on test set: 41.179759\n", "With standard deviation: 4.304313\n" ] } ], "source": [ "# Author: Elisabetta Ghisu\n", "\n", "\"\"\"\n", "- This script take as input a kernel matrix\n", "and returns the classification or regression performance\n", "- The kernel matrix can be calculated using any of the graph kernels approaches\n", "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\"\"\"\n", "\n", "print(__doc__)\n", "\n", "import sys\n", "import os\n", "import pathlib\n", "sys.path.insert(0, \"../\")\n", "from tabulate import tabulate\n", "\n", "import random\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.kernel_ridge import KernelRidge # 0.17\n", "from sklearn.metrics import accuracy_score, mean_squared_error\n", "from sklearn import svm\n", "\n", "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", "from pygraph.utils.graphfiles import loadDataset\n", "\n", "print('\\n Loading dataset from file...')\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "y = np.array(y)\n", "print(y)\n", "\n", "# setup the parameters\n", "model_type = 'regression' # Regression or classification problem\n", "print('\\n --- This is a %s problem ---' % model_type)\n", "\n", "datasize = len(dataset)\n", "trials = 100 # Trials for hyperparameters random search\n", "splits = 10 # Number of splits of the data\n", "alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", "C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", "random.seed(20) # Set the seed for uniform parameter distribution\n", "\n", "# set the output path\n", "kernel_file_path = 'kernelmatrices_weisfeilerlehman_acyclic/'\n", "if not os.path.exists(kernel_file_path):\n", " os.makedirs(kernel_file_path)\n", "\n", "\n", "\"\"\"\n", "- Here starts the main program\n", "- First we permute the data, then for each split we evaluate corresponding performances\n", "- In the end, the performances are averaged over the test sets\n", "\"\"\"\n", "\n", "# save kernel matrices to files / read kernel matrices from files\n", "kernel_file = kernel_file_path + 'km.ds'\n", "path = pathlib.Path(kernel_file)\n", "# get train set kernel matrix\n", "if path.is_file():\n", " print('\\n Loading the kernel matrix from file...')\n", " Kmatrix = np.loadtxt(kernel_file)\n", " print(Kmatrix)\n", "else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", " Kmatrix = weisfeilerlehmankernel(dataset)\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " np.savetxt(kernel_file, Kmatrix)\n", "\n", "# Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", "val_split = []\n", "test_split = []\n", "\n", "# For each split of the data\n", "for j in range(10, 10 + splits):\n", "# print('\\n Starting split %d...' % j)\n", "\n", " # Set the random set for data permutation\n", " random_state = int(j)\n", " np.random.seed(random_state)\n", " idx_perm = np.random.permutation(datasize)\n", "# print(idx_perm)\n", "\n", " # Permute the data\n", " y_perm = y[idx_perm] # targets permutation\n", "# print(y_perm)\n", " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", "# print(Kmatrix_perm)\n", " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", "\n", " # Set the training, validation and test\n", " # Note: the percentage can be set up by the user\n", " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", "\n", " # Split the kernel matrix\n", " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", "\n", " # Split the targets\n", " y_train = y_perm[0:num_train]\n", "\n", " # Normalization step (for real valued targets only)\n", " if model_type == 'regression':\n", "# print('\\n Normalizing output y...')\n", " y_train_mean = np.mean(y_train)\n", " y_train_std = np.std(y_train)\n", " y_train = (y_train - y_train_mean) / float(y_train_std)\n", "# print(y)\n", "\n", " y_val = y_perm[num_train:(num_train + num_val)]\n", " y_test = y_perm[(num_train + num_val):datasize]\n", "\n", " # Record the performance for each parameter trial respectively on validation and test set\n", " perf_all_val = []\n", " perf_all_test = []\n", "\n", " # For each parameter trial\n", " for i in range(trials):\n", " # For regression use the Kernel Ridge method\n", " if model_type == 'regression':\n", "# print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", "\n", " # Fit the kernel ridge model\n", " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", "# KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", " KR.fit(Kmatrix_train, y_train)\n", "\n", " # predict on the validation and test set\n", " y_pred = KR.predict(Kmatrix_val)\n", " y_pred_test = KR.predict(Kmatrix_test)\n", "# print(y_pred)\n", "\n", " # adjust prediction: needed because the training targets have been normalizaed\n", " y_pred = y_pred * float(y_train_std) + y_train_mean\n", "# print(y_pred)\n", " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", "# print(y_pred_test)\n", "\n", " # root mean squared error on validation\n", " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", " perf_all_val.append(rmse)\n", "\n", " # root mean squared error in test \n", " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", " perf_all_test.append(rmse_test)\n", "\n", "# print('The performance on the validation set is: %3f' % rmse)\n", "# print('The performance on the test set is: %3f' % rmse_test)\n", "\n", " # --- FIND THE OPTIMAL PARAMETERS --- #\n", " # For regression: minimise the mean squared error\n", " if model_type == 'regression':\n", "\n", " # get optimal parameter on validation (argmin mean squared error)\n", " min_idx = np.argmin(perf_all_test)\n", " alpha_opt = alpha_grid[min_idx]\n", "\n", " # performance corresponding to optimal parameter on val\n", " perf_val_opt = perf_all_val[min_idx]\n", "\n", " # corresponding performance on test for the same parameter\n", " perf_test_opt = perf_all_test[min_idx]\n", "\n", "# print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", "# print('The best performance on the validation set is: %3f' % perf_val_opt)\n", "# print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", "\n", " # append the best performance on validation\n", " # at the current split\n", " val_split.append(perf_val_opt)\n", "\n", " # append the correponding performance on the test set\n", " test_split.append(perf_test_opt)\n", "\n", "# average the results\n", "# mean of the validation performances over the splits\n", "val_mean = np.mean(np.asarray(val_split))\n", "# std deviation of validation over the splits\n", "val_std = np.std(np.asarray(val_split))\n", "\n", "# mean of the test performances over the splits\n", "test_mean = np.mean(np.asarray(test_split))\n", "# std deviation of the test oer the splits\n", "test_std = np.std(np.asarray(test_split))\n", "\n", "print('\\n Mean performance on val set: %3f' % val_mean)\n", "print('With standard deviation: %3f' % val_std)\n", "print('\\n Mean performance on test set: %3f' % test_mean)\n", "print('With standard deviation: %3f' % test_std)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# a = [0, 1, 3, 2]\n", "# b = [3, 2, 1, 0]\n", "# print(1 if a == b else 0)\n", "\n", "# max(1 ,2)\n", "\n", "# x = [ 'r', 'a', 's' ]\n", "# x.sort()\n", "# print(x)\n", "\n", "# def test1(*args, base = 'subtree'):\n", "# if base == 'subtree':\n", "# print('subtree')\n", "# elif base == 'edge':\n", "# print('edge')\n", "# else:\n", "# print('sp')\n", "\n", "# # function parameter usage test\n", "# test1('hello', 'hi', base = 'edge')\n", "\n", "# # python matrix calculation speed test\n", "# import numpy as np\n", "# import time\n", "\n", "# size = 100\n", "# m1 = np.random.random((size, size))\n", "# m2 = np.random.random((size, size))\n", "# itr = 1\n", "\n", "# start_time = time.time()\n", "# for i in range(itr):\n", "# np.dot(m1, m2)\n", "# print(time.time() - start_time)\n", "\n", "# start_time = time.time()\n", "# for j in range(itr):\n", "# result = np.zeros((size, size))\n", "# for i1 in range(size):\n", "# for i2 in range(size):\n", "# for i3 in range(size):\n", "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", "# print(time.time() - start_time)\n", "\n", "# start_time = time.time()\n", "# for i in range(itr):\n", "# print(np.dot(m1, m2))\n", "# print(time.time() - start_time)\n", "\n", "# start_time = time.time()\n", "# for j in range(itr):\n", "# result = np.zeros((size, size))\n", "# for i1 in range(size):\n", "# for i2 in range(size):\n", "# for i3 in range(size):\n", "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", "# print(result)\n", "# print(time.time() - start_time)\n", "\n", "# help(np.sum)\n", "\n", "# test dict\n", "import sys\n", "from collections import Counter\n", "import networkx as nx\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.spkernel import spkernel\n", "\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[15]\n", "nx.get_node_attributes(G1, 'label')\n", "listhqhq = list(nx.get_node_attributes(G1, 'label').values())\n", "dicthaha = dict(Counter(listhqhq))\n", "len(dicthaha)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }