From 5026caf5ad6bee450e8731df0a32dfebc2cea00d Mon Sep 17 00:00:00 2001 From: linlin Date: Mon, 5 Oct 2020 16:35:36 +0200 Subject: [PATCH] New translations marginalized.py (French) --- lang/fr/gklearn/kernels/marginalized.py | 338 ++++++++++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 lang/fr/gklearn/kernels/marginalized.py diff --git a/lang/fr/gklearn/kernels/marginalized.py b/lang/fr/gklearn/kernels/marginalized.py new file mode 100644 index 0000000..6910468 --- /dev/null +++ b/lang/fr/gklearn/kernels/marginalized.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 3 22:22:57 2020 + +@author: ljia + +@references: + + [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between + labeled graphs. In Proceedings of the 20th International Conference on + Machine Learning, Washington, DC, United States, 2003. + + [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and + Jean-Philippe Vert. Extensions of marginalized graph kernels. In + Proceedings of the twenty-first international conference on Machine + learning, page 70. ACM, 2004. +""" + +import sys +from multiprocessing import Pool +from tqdm import tqdm +import numpy as np +import networkx as nx +from gklearn.utils import SpecialLabel +from gklearn.utils.kernels import deltakernel +from gklearn.utils.parallel import parallel_gm, parallel_me +from gklearn.utils.utils import untotterTransformation +from gklearn.kernels import GraphKernel + + +class Marginalized(GraphKernel): + + def __init__(self, **kwargs): + GraphKernel.__init__(self) + self.__node_labels = kwargs.get('node_labels', []) + self.__edge_labels = kwargs.get('edge_labels', []) + self.__p_quit = kwargs.get('p_quit', 0.5) + self.__n_iteration = kwargs.get('n_iteration', 10) + self.__remove_totters = kwargs.get('remove_totters', False) + self.__ds_infos = kwargs.get('ds_infos', {}) + self.__n_iteration = int(self.__n_iteration) + + + def _compute_gm_series(self): + self.__add_dummy_labels(self._graphs) + + if self.__remove_totters: + if self._verbose >= 2: + iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) + else: + iterator = self._graphs + # @todo: this may not work. + self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + from itertools import combinations_with_replacement + itr = combinations_with_replacement(range(0, len(self._graphs)), 2) + if self._verbose >= 2: + iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + else: + iterator = itr + for i, j in iterator: + kernel = self.__kernel_do(self._graphs[i], self._graphs[j]) + gram_matrix[i][j] = kernel + gram_matrix[j][i] = kernel # @todo: no directed graph considered? + + return gram_matrix + + + def _compute_gm_imap_unordered(self): + self.__add_dummy_labels(self._graphs) + + if self.__remove_totters: + pool = Pool(self._n_jobs) + itr = range(0, len(self._graphs)) + if len(self._graphs) < 100 * self._n_jobs: + chunksize = int(len(self._graphs) / self._n_jobs) + 1 + else: + chunksize = 100 + remove_fun = self._wrapper_untotter + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), + desc='removing tottering', file=sys.stdout) + else: + iterator = pool.imap_unordered(remove_fun, itr, chunksize) + for i, g in iterator: + self._graphs[i] = g + pool.close() + pool.join() + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + def init_worker(gn_toshare): + global G_gn + G_gn = gn_toshare + do_fun = self._wrapper_kernel_do + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) + + return gram_matrix + + + def _compute_kernel_list_series(self, g1, g_list): + self.__add_dummy_labels(g_list + [g1]) + + if self.__remove_totters: + g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. + if self._verbose >= 2: + iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) + else: + iterator = g_list + # @todo: this may not work. + g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] + + # compute kernel list. + kernel_list = [None] * len(g_list) + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + else: + iterator = range(len(g_list)) + for i in iterator: + kernel = self.__kernel_do(g1, g_list[i]) + kernel_list[i] = kernel + + return kernel_list + + + def _compute_kernel_list_imap_unordered(self, g1, g_list): + self.__add_dummy_labels(g_list + [g1]) + + if self.__remove_totters: + g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. + pool = Pool(self._n_jobs) + itr = range(0, len(g_list)) + if len(g_list) < 100 * self._n_jobs: + chunksize = int(len(g_list) / self._n_jobs) + 1 + else: + chunksize = 100 + remove_fun = self._wrapper_untotter + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), + desc='removing tottering', file=sys.stdout) + else: + iterator = pool.imap_unordered(remove_fun, itr, chunksize) + for i, g in iterator: + g_list[i] = g + pool.close() + pool.join() + + # compute kernel list. + kernel_list = [None] * len(g_list) + + def init_worker(g1_toshare, g_list_toshare): + global G_g1, G_g_list + G_g1 = g1_toshare + G_g_list = g_list_toshare + do_fun = self._wrapper_kernel_list_do + def func_assign(result, var_to_assign): + var_to_assign[result[0]] = result[1] + itr = range(len(g_list)) + len_itr = len(g_list) + parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, + init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', + n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + + return kernel_list + + + def _wrapper_kernel_list_do(self, itr): + return itr, self.__kernel_do(G_g1, G_g_list[itr]) + + + def _compute_single_kernel_series(self, g1, g2): + self.__add_dummy_labels([g1] + [g2]) + if self.__remove_totters: + g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. + g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) + kernel = self.__kernel_do(g1, g2) + return kernel + + + def __kernel_do(self, g1, g2): + """Calculate marginalized graph kernel between 2 graphs. + + Parameters + ---------- + g1, g2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + + Return + ------ + kernel : float + Marginalized kernel between 2 graphs. + """ + # init parameters + kernel = 0 + num_nodes_G1 = nx.number_of_nodes(g1) + num_nodes_G2 = nx.number_of_nodes(g2) + # the initial probability distribution in the random walks generating step + # (uniform distribution over |G|) + p_init_G1 = 1 / num_nodes_G1 + p_init_G2 = 1 / num_nodes_G2 + + q = self.__p_quit * self.__p_quit + r1 = q + + # # initial R_inf + # # matrix to save all the R_inf for all pairs of nodes + # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) + # + # # calculate R_inf with a simple interative method + # for i in range(1, n_iteration): + # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) + # R_inf_new.fill(r1) + # + # # calculate R_inf for each pair of nodes + # for node1 in g1.nodes(data=True): + # neighbor_n1 = g1[node1[0]] + # # the transition probability distribution in the random walks + # # generating step (uniform distribution over the vertices adjacent + # # to the current vertex) + # if len(neighbor_n1) > 0: + # p_trans_n1 = (1 - p_quit) / len(neighbor_n1) + # for node2 in g2.nodes(data=True): + # neighbor_n2 = g2[node2[0]] + # if len(neighbor_n2) > 0: + # p_trans_n2 = (1 - p_quit) / len(neighbor_n2) + # + # for neighbor1 in neighbor_n1: + # for neighbor2 in neighbor_n2: + # t = p_trans_n1 * p_trans_n2 * \ + # deltakernel(g1.node[neighbor1][node_label], + # g2.node[neighbor2][node_label]) * \ + # deltakernel( + # neighbor_n1[neighbor1][edge_label], + # neighbor_n2[neighbor2][edge_label]) + # + # R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][ + # neighbor2] # ref [1] equation (8) + # R_inf[:] = R_inf_new + # + # # add elements of R_inf up and calculate kernel + # for node1 in g1.nodes(data=True): + # for node2 in g2.nodes(data=True): + # s = p_init_G1 * p_init_G2 * deltakernel( + # node1[1][node_label], node2[1][node_label]) + # kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) + + + R_inf = {} # dict to save all the R_inf for all pairs of nodes + # initial R_inf, the 1st iteration. + for node1 in g1.nodes(): + for node2 in g2.nodes(): + # R_inf[(node1[0], node2[0])] = r1 + if len(g1[node1]) > 0: + if len(g2[node2]) > 0: + R_inf[(node1, node2)] = r1 + else: + R_inf[(node1, node2)] = self.__p_quit + else: + if len(g2[node2]) > 0: + R_inf[(node1, node2)] = self.__p_quit + else: + R_inf[(node1, node2)] = 1 + + # compute all transition probability first. + t_dict = {} + if self.__n_iteration > 1: + for node1 in g1.nodes(): + neighbor_n1 = g1[node1] + # the transition probability distribution in the random walks + # generating step (uniform distribution over the vertices adjacent + # to the current vertex) + if len(neighbor_n1) > 0: + p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1) + for node2 in g2.nodes(): + neighbor_n2 = g2[node2] + if len(neighbor_n2) > 0: + p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2) + for neighbor1 in neighbor_n1: + for neighbor2 in neighbor_n2: + t_dict[(node1, node2, neighbor1, neighbor2)] = \ + p_trans_n1 * p_trans_n2 * \ + deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ + deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) + + # calculate R_inf with a simple interative method + for i in range(2, self.__n_iteration + 1): + R_inf_old = R_inf.copy() + + # calculate R_inf for each pair of nodes + for node1 in g1.nodes(): + neighbor_n1 = g1[node1] + # the transition probability distribution in the random walks + # generating step (uniform distribution over the vertices adjacent + # to the current vertex) + if len(neighbor_n1) > 0: + for node2 in g2.nodes(): + neighbor_n2 = g2[node2] + if len(neighbor_n2) > 0: + R_inf[(node1, node2)] = r1 + for neighbor1 in neighbor_n1: + for neighbor2 in neighbor_n2: + R_inf[(node1, node2)] += \ + (t_dict[(node1, node2, neighbor1, neighbor2)] * \ + R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) + + # add elements of R_inf up and calculate kernel + for (n1, n2), value in R_inf.items(): + s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) + kernel += s * value # ref [1] equation (6) + + return kernel + + + def _wrapper_kernel_do(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.__kernel_do(G_gn[i], G_gn[j]) + + + def _wrapper_untotter(self, i): + return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. + + + def __add_dummy_labels(self, Gn): + if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__node_labels = [SpecialLabel.DUMMY] + if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file