{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Author: Elisabetta Ghisu\n", "\n", "\"\"\"\n", "- Script containing functions for computing the shortest path kernel\n", "- The Floyd Warshall algorithm is first implemented\n", "- Then the SP is calculated\n", "\"\"\"\n", "\n", "\n", "#######################\n", "# - IMPORT PACKAGES - #\n", "#######################\n", "\n", "\n", "\n", "import numpy.matlib as matlib\n", "import numpy as np\n", "\n", "\"\"\"\n", "### FLOYD WARSHALL ALGORITHM\n", "Input:\n", "- Adjancency matrix A\n", "Output:\n", "- Shortest path matrix S\n", "\"\"\"\n", "\n", "def floyd_warshall(A):\n", "\n", "\t# nuber of nodes\n", "\tn = A.shape[0]\n", "\n", "\t# initialize shortes path matrix\n", "\tS = np.zeros(shape = (n,n))\n", "\n", "\tfor i in range(n):\n", "\t\tfor j in range(n):\n", "\t\t\tif A[i,j] == 0 and i!=j:\n", "\t\t\t\tS[i,j] = float(\"inf\")\n", "\t\t\telse:\n", "\t\t\t\tS[i,j] = A[i,j]\n", "\n", "\t# Compute the shortest path matrix\n", "\tfor k in range(n):\n", "\t\tfor i in range(n):\n", "\t\t\tfor j in range(n):\n", "\t\t\t\tif S[i,j] > S[i,k] + S[k,j]:\n", "\t\t\t\t\tS[i,j] = S[i,k] + S[k,j]\n", "\n", "\treturn S\t\t\t\t\t\t\t\t\n", "\n", "\n", "\n", "\"\"\"\n", "SHORTEST PATH KERNEL: This is a fast implementation of the shortest path\n", "kernel algorithm\n", "Inputs\n", "- Adjancency matrix\n", "- List of list of node labels for each graph\n", "- Total number of node labels \n", "Outputs\n", "- Kernel matrix\n", "- Feature matrix\n", "\"\"\"\n", "\n", "def sp_kernel_fast(adj_mat, labels, L):\n", "\n", "\t# Number of graphs\n", "\tn = len(adj_mat)\n", "\tL = int(L)\n", "\tS = []\n", "\n", "\t# shortest path matrices\n", "\tfor i in xrange(n):\n", "\t\tif i%1000 == 0 and i !=0:\n", " \t\t\tprint('haha') #( \"%d\" % i)\n", "\t\tS.append(floyd_warshall(adj_mat[i]))\n", "\t\n", "\t# maximum length of shortest paths in the dataset\n", "\tmax_path = 0\n", "\n", "\t# for each graph in dataset\n", "\tfor i in xrange(n):\n", "\n", "\t\tS_cur = np.copy(S[i])\n", "\t\tS_cur[S_cur == np.inf] = 0\n", "\t\tnew_max = np.max(S_cur)\n", "\t\t\n", "\t\tif new_max > max_path:\n", "\t\t\tmax_path = new_max # get max short path in all Ss\n", "\n", "\t# maximum length of shortest paths\n", "\tmax_path = int(max_path)\n", "\n", "\t# initialize feature matrix\n", "\tsp = np.zeros(((max_path + 1) * L * (L+1) /2,n))\n", "\n", "\t# compute feature map for shortest path\n", "\tfor i in xrange(n):\n", "\n", "\t\tif i % 1000 == 0:\n", "\t\t\tprint('haha') #\"Processed %d graphs\" %i\n", "\n", "\t\tS_graph = S[i]\n", "\t\tlabels_graph = np.asarray(labels[i].reshape((len(labels[i]),1)))\n", "\t\tlabels_graph = labels_graph + 1\n", "\t\t\n", "\t\tlabels_aux = matlib.repmat(labels_graph, 1, len(labels_graph))\n", "\t\t\n", "\t\tmin_lab = np.minimum(labels_aux, labels_aux.T)\n", "\t\t\n", "\t\tmax_lab = np.maximum(labels_aux, labels_aux.T)\n", "\t\tsub_path = np.triu(~(np.isinf(S_graph))).T\n", "\n", "\t\tmin_lab = min_lab[sub_path]\n", "\t\tmax_lab = max_lab[sub_path]\n", "\n", "\n", "\t\tind = S_graph[sub_path] * L * (L + 1) / 2 + (min_lab - 1) * (2*L + 2 - min_lab) / 2 + max_lab - min_lab\n", "\t\tind = ind.astype(int)\n", "\t\taccum = np.zeros((max_path + 1) * L * (L + 1) /2)\n", "\t\taccum[:ind.max() + 1] += np.bincount(ind.astype(int))\n", "\t\tsp[ind,i] = accum[ind]\n", "\t\n", "\tsum_cols = np.sum(sp, axis = 1)\n", "\tind_true = sum_cols != 0\n", "\tsp = sp[ind_true,:]\n", "\t\n", "\t# compute kernel matrix\n", "\tK = np.dot(sp.T,sp)\n", " \n", "\treturn K, sp" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "No module named 'igraph'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# iGraph imports to handle graphs and for graph I/O\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0migraph\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mGraph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mImportError\u001b[0m: No module named 'igraph'" ] } ], "source": [ "#Authors: Elisabetta Ghisu, Felipe Llinares Lopez\n", "\n", "\"\"\"\n", "- This script includes a list of functions for analyzing \n", "parsing and formatting graphs\n", "- The graphs are given in graphml format\n", "- It also cntans functions for loading, processing the graphs\n", "and extract graph statistics\n", "\"\"\"\n", "\n", "\n", "import numpy as np\n", "from numpy import genfromtxt\n", "\n", "# iGraph imports to handle graphs and for graph I/O\n", "from igraph import Graph\n", "\n", "\n", "# ---------------------------------GRAPHML I/O FUNCTIONS------------------------------------ #\n", "\n", "# INPUT:\n", "# filenames_graphs: list of GraphML files, where each file contains one graph in the dataset\n", "# filename_labels: text file with labels corresponding to each graph in the dataset, in the same order as they are in\n", "# filename_graphs\n", "# OUTPUT:\n", "# G: A list containing one iGraph object for each graph in the dataset\n", "# Y: A Numpy array containing the labels corresponding to each graph, in the same order as G\n", "def load_graphml(filenames_graphs, filename_labels):\n", " G = []\n", " for fname in filenames_graphs:\n", " G.append(Graph.Read_GraphML(fname))\n", " Y = genfromtxt(filename_labels)\n", " return (G, Y)\n", "\n", "\n", "# Loads a list of paths to GraphML files from filename_list\n", "def load_file_list(filename_flist):\n", " f = open(filename_flist, 'r')\n", " f_graphs = []\n", " for line in f:\n", " f_graphs.append(line.strip())\n", " f.close()\n", " return f_graphs\n", "\n", "\n", "# --------------------------------COMPUTE STATISTICS---------------------------------------- #\n", "\n", "\n", "# Retrieve labels of all vertices belonging to any graph in the list of iGraph objects G and\n", "# returns the entire list, and a list with the alphabet of the vertex labels\n", "def get_all_vertex_labels(G, att_name='label'):\n", " v_l = []\n", " for g in G:\n", " v_l += g.vs[att_name]\n", " return (v_l, np.unique(v_l))\n", "\n", "\n", "# Retrieve labels of all edges belonging to any graph in the list of iGraph objects G and\n", "# returns the entire list, and a list with the alphabet of the edge labels\n", "def get_all_edge_labels(G, att_name='label'):\n", " e_l = []\n", " for g in G:\n", " e_l += g.es[att_name]\n", " return (e_l, np.unique(e_l))\n", "\n", "\n", "# Returns a list where each element is itself the adjacency list of the corresponding graph\n", "# The adjacency lit of a graph has the following format:\n", "# it is a list where each element is a list containing the id of adjacent nodes\n", "def get_adj_list(G):\n", " ad_l = []\n", " for g in G:\n", " ad_l.append(g.get_adjlist())\n", " return ad_l\n", "\n", "# Returns a list where each element is the adjacency matrix of the graph \n", "# The adjancency matrix is in iGraph format\n", "def get_adj_mat(G):\n", " ad_m = []\n", " for g in G:\n", " ad_m.append(g.get_adjacency())\n", " return ad_m\n", "\n", "# Returns a list where each element contains the nodes label for a graph\n", "def get_node_labels(G, att_name = 'label'):\n", " node_l = []\n", " for g in G:\n", " node_l.append(g.vs[att_name])\n", " return node_l\n", "\n", "\n", "\n", "# ----------------- LOAD AND PROCESS THE GRAPHS --------------- #\n", "\n", "\n", "\"\"\"\n", "Inputs:\n", "- list of graphs file\n", "- labels file\n", "- path to the data folder\n", "Outputs:\n", "- List of node labels\n", "- List of adjancency lists\n", "- List of graphs in graphml format\n", "- Targets\n", "- number of classes\n", "- sample size\n", "\"\"\"\n", "\n", "\n", "def load_and_process(filenames_graphs, filename_labels, path_to_dataset):\n", "\n", " # load a list of names to graphml files\n", " f_graphs = load_file_list(filenames_graphs)\n", " # sample size\n", " n = len(f_graphs)\n", "\n", " # create a list of paths to the files\n", " f_graphs_path =[]\n", "\n", " # for each graph in dataset\n", " for i in range(n):\n", "\n", " # index the graph\n", " graph_name = f_graphs[i]\n", "\n", " # path to the data folder\n", " path = \"%s/%s\" % (path_to_dataset, graph_name)\n", " f_graphs_path.append(path)\n", "\n", " # If the data is DD have to delete an element (corrupted file)\n", " if graph_name == \"DD\":\n", " del f_graphs_path[148]\n", " n = n-1\n", "\n", " # Load the graphs in graphml format\n", " # G is a llist of graphml graph\n", " # Y is an array of targets\n", " G,Y = load_graphml(f_graphs_path, filename_labels)\n", "\n", " # Delete corrupted file in DD\n", " if graph_name == \"DD\": \n", " Y = np.delete(Y, 148)\n", "\n", " # get adjacency list and matrix for all the graphs in G\n", " ad_list = get_adj_list(G)\n", " ad_mat = get_adj_mat(G)\n", "\n", " # get a list containing lists of node labels\n", " node_label = get_node_labels(G)\n", "\n", " return node_label, ad_list, G, Y\n", "\n", "\n", "\n", "\"\"\"\n", "RENAME NODES: function to rename nodes from 0,...,num_nodes\n", "Input\n", "- list of list of node labels in each graph\n", "Output\n", "- L: total number of different labels in the dataset\n", "- node_label: new renamed labels\n", "\"\"\"\n", "\n", "def rename_nodes(node_label): \n", " \n", " # number of graphs in the dataset\n", " n = len(node_label)\n", "\n", " # labels will store the new labels\n", " labels = [0] * n\n", "\n", " # disctionary containing the map from the old to the new labels\n", " label_lookup = {}\n", "\n", " # counter of unique labels\n", " label_counter = 0\n", "\n", " # for each graph in dataset\n", " for i in range(n):\n", "\n", "\n", " # number of nodes in graph[i]\n", " num_nodes = len(node_label[i]) \n", "\n", " # will be used to store the new labels\n", " labels[i] = np.zeros(num_nodes, dtype = np.uint64) # positive integers\n", "\n", " # for each node in the graph\n", " for j in range(num_nodes):\n", "\n", " # the node label to a string\n", " l_node_str = str(np.copy(node_label[i][j]))\n", " \n", " # if the string has not been observed yet\n", " # the corresponding node is assigned a new label\n", " # otherwise it will be named with the same label\n", " # already assigned to an identical string\n", "\n", " if not label_lookup.has_key(l_node_str):\n", " label_lookup[l_node_str] = label_counter\n", " labels[i][j] = label_counter \n", " label_counter += 1\n", " else:\n", " labels[i][j] = label_lookup[l_node_str]\n", "\n", " # total number of labels in the dataset\n", " L = label_counter\n", " print('haha') #'Number of original labels %d' % L \n", "\n", " return L, labels" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "usage: ipykernel_launcher.py [-h] --dataset DATASET\n", "ipykernel_launcher.py: error: the following arguments are required: --dataset\n" ] }, { "ename": "SystemExit", "evalue": "2", "output_type": "error", "traceback": [ "An exception has occurred, use %tb to see the full traceback.\n", "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py:2918: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n", " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n" ] } ], "source": [ "# Author: Elisabetta Ghisu\n", "\n", "\"\"\"\n", "- Script for computing the kernel matrix and features map \n", "using shortest path kernel\n", "\"\"\"\n", "\n", "###########################\n", "# --- IMPORT PACKAGES --- #\n", "###########################\n", "\n", "import numpy as np\n", "import argparse\n", "import os\n", "import pickle\n", "\n", "from numpy import genfromtxt\n", "\n", "# from sp_functions import *\n", "# from parse_graphs import *\n", "\n", "\n", "\n", "##############################\n", "### Command Line Arguments ###\n", "##############################\n", "\n", "parser = argparse.ArgumentParser(description = \"Compute kernel and features matrices via shortest path kernel\")\n", "parser.add_argument(\"--dataset\", required = True, help = \"Name of the dataset\")\n", "args = parser.parse_args()\n", "\n", "\n", "#####################\n", "### LOAD THE DATA ###\n", "#####################\n", "\n", "\"\"\"\n", "- Here we load the data input and targets\n", "- The data are assumed to be in graph formats\n", "- They should be in graphml format \n", "\"\"\"\n", "\n", "# path to the list of graphs and dataset\n", "filenames_graphs = \"data/%s.list\" % (args.dataset)\n", "path_to_dataset = \"data/%s\" % (args.dataset) \n", "\n", "# Load the targets\n", "filename_labels = \"data/%s_label.txt\" % (args.dataset)\n", "\n", "# load and process graphs\n", "node_label, ad_list, G, Y = load_and_process(filenames_graphs, filename_labels, path_to_dataset)\n", "\n", "# output directory\n", "out_path = \"kernel_matrices/%s/sp\" % args.dataset\n", "\n", "# If the output directory does not exist, then create it\n", "if not os.path.exists(out_path):\n", " os.makedirs(out_path)\n", "\n", "\n", "#########################\n", "# --- SHORTEST PATH --- #\n", "#########################\n", "\n", "\n", "# assign labels starting from zero to the nodes\n", "L, labels = rename_nodes(node_label)\n", "\n", "\n", "# Compute adjancency matrix \n", "adj_mat = get_adj_mat(G)\n", "\n", "# Compute kernel and feature maps using shortest path\n", "K, phi = sp_kernel_fast(adj_mat, labels, L)\n", "\n", "# save kernel matrix\n", "file_name = \"%s/%s_ker_mat\" % (out_path, args.dataset)\n", "np.save(file_name, K)\n", "\n", "# save feature map\n", "file_name = \"%s/%s_phi_map\" % (out_path, args.dataset)\n", "np.save(file_name, phi)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0. 2. 3. 1. 2.]]\n", "{0: {0: [0], 1: [0, 3, 1], 2: [0, 3, 4, 2], 3: [0, 3], 4: [0, 3, 4]}, 1: {0: [1, 3, 0], 1: [1], 2: [1, 3, 4, 2], 3: [1, 3], 4: [1, 3, 4]}, 2: {0: [2, 4, 3, 0], 1: [2, 4, 3, 1], 2: [2], 3: [2, 4, 3], 4: [2, 4]}, 3: {0: [3, 0], 1: [3, 1], 2: [3, 4, 2], 3: [3], 4: [3, 4]}, 4: {0: [4, 3, 0], 1: [4, 3, 1], 2: [4, 2], 3: [4, 3], 4: [4]}}\n", "[[ 0. 2. 3. 1. 2.]\n", " [ 2. 0. 3. 1. 2.]\n", " [ 3. 3. 0. 2. 1.]\n", " [ 1. 1. 2. 0. 1.]\n", " [ 2. 2. 1. 1. 0.]]\n" ] }, { "ename": "NameError", "evalue": "name 'plt' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'plt' is not defined" ] } ], "source": [ "dataset, y = loadDataset(\"/home/ljia/Documents/research-repo/datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[12]\n", "\n", "nx.draw_networkx(G1)\n", "# print(list(dataset[12][4]))\n", "\n", "l = nx.shortest_path(G1)\n", "\n", "l2 = nx.floyd_warshall_numpy(G1)\n", "print(np.array(l2[0]))\n", "print(l)\n", "print(l2)\n", "plt.show()\n", "\n", "S = getSPGraph(G1)\n", "nx.draw_networkx(S)\n", "pos = nx.spring_layout(S)\n", "edge_labels = nx.get_edge_attributes(S,'cost')\n", "print(edge_labels)\n", "nx.draw_networkx_edge_labels(S, pos, edge_labels = edge_labels)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import networkx as nx\n", " \n", "def loadCT(filename):\n", " \"\"\"load data from .ct file.\n", " \n", " Notes\n", " ------ \n", " a typical example of data in .ct is like this:\n", " \n", " 3 2 <- number of nodes and edges\n", " 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?\n", " 0.0000 0.0000 0.0000 C\n", " 0.0000 0.0000 0.0000 O\n", " 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?\n", " 2 3 1 1\n", " \"\"\"\n", " content = open(filename).read().splitlines()\n", " G = nx.Graph(name=str(content[0])) # set name of the graph\n", " tmp = content[1].split(\" \")\n", " if tmp[0] == '':\n", " nb_nodes = int(tmp[1]) # number of the nodes\n", " nb_edges = int(tmp[2]) # number of the edges\n", " else:\n", " nb_nodes = int(tmp[0])\n", " nb_edges = int(tmp[1])\n", "\n", " for i in range(0, nb_nodes):\n", " tmp = content[i + 2].split(\" \")\n", " tmp = [x for x in tmp if x != '']\n", " G.add_node(i, label=tmp[3])\n", "\n", " for i in range(0, nb_edges):\n", " tmp = content[i + G.number_of_nodes() + 2].split(\" \")\n", " tmp = [x for x in tmp if x != '']\n", " G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))\n", " return G\n", "\n", "\n", "def loadGXL(filename):\n", " import networkx as nx\n", " import xml.etree.ElementTree as ET\n", "\n", " tree = ET.parse(filename)\n", " root = tree.getroot()\n", " index = 0\n", " G = nx.Graph()\n", " dic={}\n", " for node in root.iter('node'):\n", " label = node.find('attr')[0].text\n", " dic[node.attrib['id']] = index\n", " G.add_node(index, id=node.attrib['id'], label=label)\n", " index += 1\n", " \n", " for edge in root.iter('edge'):\n", " label = edge.find('attr')[0].text\n", " G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)\n", " return G\n", " \n", "def loadDataset(filename):\n", " \"\"\"load file list of the dataset.\n", " \"\"\"\n", " from os.path import dirname, splitext\n", "\n", " dirname_dataset = dirname(filename)\n", " extension = splitext(filename)[1][1:]\n", " data = []\n", " y = []\n", " if(extension == \"ds\"):\n", " content = open(filename).read().splitlines()\n", " for i in range(0, len(content)):\n", " tmp = content[i].split(' ')\n", " data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names\n", " y.append(float(tmp[1]))\n", " elif(extension == \"cxl\"):\n", " import xml.etree.ElementTree as ET\n", "\n", " tree = ET.parse(filename)\n", " root = tree.getroot()\n", " data = []\n", " y = []\n", " for graph in root.iter('print'):\n", " mol_filename = graph.attrib['file']\n", " mol_class = graph.attrib['class']\n", " data.append(loadGXL(dirname_dataset + '/' + mol_filename))\n", " y.append(mol_class)\n", "\n", " return data, y" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 48)", "output_type": "error", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m48\u001b[0m\n\u001b[0;31m Kmatrix[j][i] += (i == j ? 0 : 1)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "source": [ "import sys\n", "import pathlib\n", "sys.path.insert(0, \"../\")\n", "\n", "\n", "import networkx as nx\n", "import numpy as np\n", "import time\n", "\n", "from utils.utils import getSPGraph\n", "\n", "\n", "def spkernel(Gn):\n", " \"\"\"Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.\n", " \n", " Parameters\n", " ----------\n", " G : NetworkX graph\n", " The graph to be tramsformed.\n", " \n", " Return\n", " ------\n", " S : NetworkX graph\n", " The shortest-paths graph corresponding to G.\n", " \n", " References\n", " ----------\n", " [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.\n", " \"\"\"\n", " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", " \n", " Sn = [] # get shortest path graphs of Gn\n", " for i in range(0, len(Gn)):\n", " Sn.append(getSPGraph(Gn[i]))\n", " \n", "# print(S1.nodes(data = True))\n", "# print(S2.nodes(data = True))\n", "# print(S1.edges(data = True))\n", "# print(S2.edges(data = True))\n", " \n", " start_time = time.time()\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " for e1 in Sn[i].edges(data = True):\n", " for e2 in Sn[j].edges(data = True): \n", " if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):\n", " Kmatrix[i][j] += 1\n", " Kmatrix[j][i] += (i == j ? 0 : 1)\n", " \n", " print(\"--- %s seconds ---\" % (time.time() - start_time))\n", " \n", " return Kmatrix" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "--- 0.05678129196166992 seconds ---\n", "1\n", "--- 0.15176129341125488 seconds ---\n", "2\n", "--- 0.20930719375610352 seconds ---\n", "3\n", "--- 0.3049781322479248 seconds ---\n", "4\n", "--- 0.4029049873352051 seconds ---\n", "5\n", "--- 0.5458371639251709 seconds ---\n", "6\n", "--- 0.6920650005340576 seconds ---\n", "7\n", "--- 0.7972092628479004 seconds ---\n", "8\n", "--- 0.947425365447998 seconds ---\n", "9\n", "--- 1.1016933917999268 seconds ---\n", "10\n", "--- 1.2554333209991455 seconds ---\n", "11\n", "--- 1.4140815734863281 seconds ---\n", "12\n", "--- 1.562861442565918 seconds ---\n", "13\n", "--- 1.7876057624816895 seconds ---\n", "14\n", "--- 1.9889881610870361 seconds ---\n", "15\n", "--- 2.2633984088897705 seconds ---\n", "16\n", "--- 2.480710983276367 seconds ---\n", "17\n", "--- 2.683915138244629 seconds ---\n", "18\n", "--- 2.8276052474975586 seconds ---\n", "19\n", "--- 2.972059488296509 seconds ---\n", "20\n", "--- 3.11892032623291 seconds ---\n", "21\n", "--- 3.330472469329834 seconds ---\n", "22\n", "--- 3.5461206436157227 seconds ---\n", "23\n", "--- 3.7521393299102783 seconds ---\n", "24\n", "--- 3.956348180770874 seconds ---\n", "25\n", "--- 4.162136793136597 seconds ---\n", "26\n", "--- 4.365236759185791 seconds ---\n", "27\n", "--- 4.572294473648071 seconds ---\n", "28\n", "--- 4.778241872787476 seconds ---\n", "29\n", "--- 4.981487035751343 seconds ---\n", "30\n", "--- 5.189010143280029 seconds ---\n", "31\n", "--- 5.466430902481079 seconds ---\n", "32\n", "--- 5.73804497718811 seconds ---\n", "33\n", "--- 6.0193397998809814 seconds ---\n", "34\n", "--- 6.293334245681763 seconds ---\n", "35\n", "--- 6.569210767745972 seconds ---\n", "36\n", "--- 6.783808708190918 seconds ---\n", "37\n", "--- 6.999167203903198 seconds ---\n", "38\n", "--- 7.209052085876465 seconds ---\n", "39\n", "--- 7.414280652999878 seconds ---\n", "40\n", "--- 7.620949983596802 seconds ---\n", "41\n", "--- 7.892791986465454 seconds ---\n", "42\n", "--- 8.166114330291748 seconds ---\n", "43\n", "--- 8.46480393409729 seconds ---\n", "44\n", "--- 8.75532841682434 seconds ---\n", "45\n", "--- 9.027160882949829 seconds ---\n", "46\n", "--- 9.303063869476318 seconds ---\n", "47\n", "--- 9.575549125671387 seconds ---\n", "48\n", "--- 9.867429733276367 seconds ---\n", "49\n", "--- 10.160123109817505 seconds ---\n", "50\n", "--- 10.437638759613037 seconds ---\n", "51\n", "--- 10.714671611785889 seconds ---\n", "52\n", "--- 10.987818479537964 seconds ---\n", "53\n", "--- 11.259410381317139 seconds ---\n", "54\n", "--- 11.535178184509277 seconds ---\n", "55\n", "--- 11.807695865631104 seconds ---\n", "56\n", "--- 12.158225774765015 seconds ---\n", "57\n", "--- 12.506253004074097 seconds ---\n", "58\n", "--- 12.856064319610596 seconds ---\n", "59\n", "--- 13.203948497772217 seconds ---\n", "60\n", "--- 13.552793741226196 seconds ---\n", "61\n", "--- 13.906684160232544 seconds ---\n", "62\n", "--- 14.256698369979858 seconds ---\n", "63\n", "--- 14.606950283050537 seconds ---\n", "64\n", "--- 14.876070022583008 seconds ---\n", "65\n", "--- 15.148754596710205 seconds ---\n", "66\n", "--- 15.43168306350708 seconds ---\n", "67\n", "--- 15.710469961166382 seconds ---\n", "68\n", "--- 15.98047399520874 seconds ---\n", "69\n", "--- 16.25121569633484 seconds ---\n", "70\n", "--- 16.52086853981018 seconds ---\n", "71\n", "--- 16.790047645568848 seconds ---\n", "72\n", "--- 17.06355619430542 seconds ---\n", "73\n", "--- 17.335728406906128 seconds ---\n", "74\n", "--- 17.607405424118042 seconds ---\n", "75\n", "--- 17.955402135849 seconds ---\n", "76\n", "--- 18.303555488586426 seconds ---\n", "77\n", "--- 18.654282808303833 seconds ---\n", "78\n", "--- 19.004570245742798 seconds ---\n", "79\n", "--- 19.35291624069214 seconds ---\n", "80\n", "--- 19.700473070144653 seconds ---\n", "81\n", "--- 20.04847502708435 seconds ---\n", "82\n", "--- 20.39787983894348 seconds ---\n", "83\n", "--- 20.74629044532776 seconds ---\n", "84\n", "--- 21.094562768936157 seconds ---\n", "85\n", "--- 21.445199489593506 seconds ---\n", "86\n", "--- 21.794403791427612 seconds ---\n", "87\n", "--- 22.143365383148193 seconds ---\n", "88\n", "--- 22.49206042289734 seconds ---\n", "89\n", "--- 22.840426445007324 seconds ---\n", "90\n", "--- 23.189460515975952 seconds ---\n", "91\n", "--- 23.539386749267578 seconds ---\n", "92\n", "--- 23.888701677322388 seconds ---\n", "93\n", "--- 24.23668909072876 seconds ---\n", "94\n", "--- 24.58505630493164 seconds ---\n", "95\n", "--- 25.019609451293945 seconds ---\n", "96\n", "--- 25.456527709960938 seconds ---\n", "97\n", "--- 25.891918182373047 seconds ---\n", "98\n", "--- 26.32820987701416 seconds ---\n", "99\n", "--- 26.76149344444275 seconds ---\n", "100\n", "--- 27.197012424468994 seconds ---\n", "101\n", "--- 27.63314127922058 seconds ---\n", "102\n", "--- 28.068315029144287 seconds ---\n", "103\n", "--- 28.50419807434082 seconds ---\n", "104\n", "--- 28.852453231811523 seconds ---\n", "105\n", "--- 29.205727338790894 seconds ---\n", "106\n", "--- 29.554840087890625 seconds ---\n", "107\n", "--- 29.90355086326599 seconds ---\n", "108\n", "--- 30.251071214675903 seconds ---\n", "109\n", "--- 30.599868059158325 seconds ---\n", "110\n", "--- 30.94942593574524 seconds ---\n", "111\n", "--- 31.298285245895386 seconds ---\n", "112\n", "--- 31.648550271987915 seconds ---\n", "113\n", "--- 32.0825355052948 seconds ---\n", "114\n", "--- 32.516993045806885 seconds ---\n", "115\n", "--- 32.950743198394775 seconds ---\n", "116\n", "--- 33.38488531112671 seconds ---\n", "117\n", "--- 33.81857705116272 seconds ---\n", "118\n", "--- 34.27995991706848 seconds ---\n", "119\n", "--- 34.728654623031616 seconds ---\n", "120\n", "--- 35.16262221336365 seconds ---\n", "121\n", "--- 35.5960898399353 seconds ---\n", "122\n", "--- 36.02964925765991 seconds ---\n", "123\n", "--- 36.46674466133118 seconds ---\n", "124\n", "--- 36.91917443275452 seconds ---\n", "125\n", "--- 37.381704330444336 seconds ---\n", "126\n", "--- 37.81864261627197 seconds ---\n", "127\n", "--- 38.3528311252594 seconds ---\n", "128\n", "--- 38.89131188392639 seconds ---\n", "129\n", "--- 39.42161011695862 seconds ---\n", "130\n", "--- 39.95006561279297 seconds ---\n", "131\n", "--- 40.476089000701904 seconds ---\n", "132\n", "--- 41.00121235847473 seconds ---\n", "133\n", "--- 41.4318163394928 seconds ---\n", "134\n", "--- 41.86459708213806 seconds ---\n", "135\n", "--- 42.29518222808838 seconds ---\n", "136\n", "--- 42.729474782943726 seconds ---\n", "137\n", "--- 43.16999864578247 seconds ---\n", "138\n", "--- 43.606104135513306 seconds ---\n", "139\n", "--- 44.04209113121033 seconds ---\n", "140\n", "--- 44.4772834777832 seconds ---\n", "141\n", "--- 45.01142644882202 seconds ---\n", "142\n", "--- 45.543590784072876 seconds ---\n", "143\n", "--- 46.07910680770874 seconds ---\n", "144\n", "--- 46.612366914749146 seconds ---\n", "145\n", "--- 47.1452751159668 seconds ---\n", "146\n", "--- 47.67322564125061 seconds ---\n", "147\n", "--- 48.20156168937683 seconds ---\n", "148\n", "--- 48.73471546173096 seconds ---\n", "149\n", "--- 49.2733519077301 seconds ---\n", "150\n", "--- 49.806400537490845 seconds ---\n", "151\n", "--- 50.33490014076233 seconds ---\n", "152\n", "--- 50.86489534378052 seconds ---\n", "153\n", "--- 51.39602565765381 seconds ---\n", "154\n", "--- 51.93729043006897 seconds ---\n", "155\n", "--- 52.473469972610474 seconds ---\n", "156\n", "--- 53.01401090621948 seconds ---\n", "157\n", "--- 53.58053278923035 seconds ---\n", "158\n", "--- 54.22534370422363 seconds ---\n", "159\n", "--- 54.870089292526245 seconds ---\n", "160\n", "--- 55.50953507423401 seconds ---\n", "161\n", "--- 56.144059896469116 seconds ---\n", "162\n", "--- 56.779675245285034 seconds ---\n", "163\n", "--- 57.41550326347351 seconds ---\n", "164\n", "--- 58.04742622375488 seconds ---\n", "165\n", "--- 58.57527136802673 seconds ---\n", "166\n", "--- 59.10521101951599 seconds ---\n", "167\n", "--- 59.737877368927 seconds ---\n", "168\n", "--- 60.373518228530884 seconds ---\n", "169\n", "--- 61.00429916381836 seconds ---\n", "170\n", "--- 61.64198398590088 seconds ---\n", "171\n", "--- 62.27683877944946 seconds ---\n", "172\n", "--- 62.91516971588135 seconds ---\n", "173\n", "--- 63.26179265975952 seconds ---\n", "174\n", "--- 63.89728498458862 seconds ---\n", "175\n", "--- 64.53867197036743 seconds ---\n", "176\n", "--- 65.18402314186096 seconds ---\n", "177\n", "--- 65.82770419120789 seconds ---\n", "178\n", "--- 66.46093964576721 seconds ---\n", "179\n", "--- 67.09133172035217 seconds ---\n", "180\n", "--- 67.73470735549927 seconds ---\n", "181\n", "--- 68.4084141254425 seconds ---\n", "182\n", "--- 69.05118441581726 seconds ---\n", "183\n", "--- 69.68487596511841 seconds ---\n", "184\n", "--- 70.3204357624054 seconds ---\n", "[[ 3. 1. 3. ..., 1. 1. 1.]\n", " [ 1. 6. 1. ..., 0. 0. 3.]\n", " [ 3. 1. 3. ..., 1. 1. 1.]\n", " ..., \n", " [ 1. 0. 1. ..., 55. 21. 7.]\n", " [ 1. 0. 1. ..., 21. 55. 7.]\n", " [ 1. 3. 1. ..., 7. 7. 55.]]\n" ] } ], "source": [ "dataset, y = loadDataset(\"/home/ljia/Documents/research-repo/datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[12]\n", "G2 = dataset[20]\n", "Kmatrix = spkernel(dataset)\n", "\n", "print(Kmatrix)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }