From c74c87e0cb46d3f7927d2493fe93f1ae25e495a1 Mon Sep 17 00:00:00 2001
From: linlin <jajupmochi@gmail.com>
Date: Sun, 4 Oct 2020 19:17:07 +0200
Subject: [PATCH] New translations utils.py (French)

---
 lang/fr/gklearn/utils/utils.py | 605 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 605 insertions(+)
 create mode 100644 lang/fr/gklearn/utils/utils.py

diff --git a/lang/fr/gklearn/utils/utils.py b/lang/fr/gklearn/utils/utils.py
new file mode 100644
index 0000000..c32169d
--- /dev/null
+++ b/lang/fr/gklearn/utils/utils.py
@@ -0,0 +1,605 @@
+import networkx as nx
+import numpy as np
+from copy import deepcopy
+from enum import Enum, unique
+#from itertools import product
+
+# from tqdm import tqdm
+
+
+def getSPLengths(G1):
+	sp = nx.shortest_path(G1)
+	distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
+	for i in sp.keys():
+		for j in sp[i].keys():
+			distances[i, j] = len(sp[i][j]) - 1
+	return distances
+
+
+def getSPGraph(G, edge_weight=None):
+	"""Transform graph G to its corresponding shortest-paths graph.
+
+	Parameters
+	----------
+	G : NetworkX graph
+		The graph to be tramsformed.
+	edge_weight : string
+		edge attribute corresponding to the edge weight.
+
+	Return
+	------
+	S : NetworkX graph
+		The shortest-paths graph corresponding to G.
+
+	Notes
+	------
+	For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
+
+	References
+	----------
+	.. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
+	"""
+	return floydTransformation(G, edge_weight=edge_weight)
+
+
+def floydTransformation(G, edge_weight=None):
+	"""Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
+
+	Parameters
+	----------
+	G : NetworkX graph
+		The graph to be tramsformed.
+	edge_weight : string
+		edge attribute corresponding to the edge weight. The default edge weight is bond_type.
+
+	Return
+	------
+	S : NetworkX graph
+		The shortest-paths graph corresponding to G.
+
+	References
+	----------
+	.. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
+	"""
+	spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
+	S = nx.Graph()
+	S.add_nodes_from(G.nodes(data=True))
+	ns = list(G.nodes())
+	for i in range(0, G.number_of_nodes()):
+		for j in range(i + 1, G.number_of_nodes()):
+			if spMatrix[i, j] != np.inf:
+				S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
+	return S
+
+
+def get_shortest_paths(G, weight, directed):
+	"""Get all shortest paths of a graph.
+
+	Parameters
+	----------
+	G : NetworkX graphs
+		The graphs whose paths are calculated.
+	weight : string/None
+		edge attribute used as weight to calculate the shortest path.
+	directed: boolean
+		Whether graph is directed.
+
+	Return
+	------
+	sp : list of list
+		List of shortest paths of the graph, where each path is represented by a list of nodes.
+	"""
+	from itertools import combinations
+	sp = []
+	for n1, n2 in combinations(G.nodes(), 2):
+		try:
+			spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
+		except nx.NetworkXNoPath:  # nodes not connected
+			pass
+		else:
+			sp += spltemp
+			# each edge walk is counted twice, starting from both its extreme nodes.
+			if not directed:
+				sp += [sptemp[::-1] for sptemp in spltemp]
+				
+	# add single nodes as length 0 paths.
+	sp += [[n] for n in G.nodes()]
+	return sp
+
+
+def untotterTransformation(G, node_label, edge_label):
+	"""Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
+
+	Parameters
+	----------
+	G : NetworkX graph
+		The graph to be tramsformed.
+	node_label : string
+		node attribute used as label. The default node label is 'atom'.
+	edge_label : string
+		edge attribute used as label. The default edge label is 'bond_type'.
+
+	Return
+	------
+	gt : NetworkX graph
+		The transformed graph corresponding to G.
+
+	References
+	----------
+	.. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
+	"""
+	# arrange all graphs in a list
+	G = G.to_directed()
+	gt = nx.Graph()
+	gt.graph = G.graph
+	gt.add_nodes_from(G.nodes(data=True))
+	for edge in G.edges():
+		gt.add_node(edge)
+		gt.nodes[edge].update({node_label: G.nodes[edge[1]][node_label]})
+		gt.add_edge(edge[0], edge)
+		gt.edges[edge[0], edge].update({
+			edge_label:
+			G[edge[0]][edge[1]][edge_label]
+		})
+		for neighbor in G[edge[1]]:
+			if neighbor != edge[0]:
+				gt.add_edge(edge, (edge[1], neighbor))
+				gt.edges[edge, (edge[1], neighbor)].update({
+					edge_label:
+					G[edge[1]][neighbor][edge_label]
+				})
+	# nx.draw_networkx(gt)
+	# plt.show()
+
+	# relabel nodes using consecutive integers for convenience of kernel calculation.
+	gt = nx.convert_node_labels_to_integers(
+		gt, first_label=0, label_attribute='label_orignal')
+	return gt
+
+
+def direct_product(G1, G2, node_label, edge_label):
+	"""Return the direct/tensor product of directed graphs G1 and G2.
+
+	Parameters
+	----------
+	G1, G2 : NetworkX graph
+		The original graphs.
+	node_label : string
+		node attribute used as label. The default node label is 'atom'.
+	edge_label : string
+		edge attribute used as label. The default edge label is 'bond_type'.
+
+	Return
+	------
+	gt : NetworkX graph
+		The direct product graph of G1 and G2.
+
+	Notes
+	-----
+	This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
+
+	References
+	----------
+	.. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
+	"""
+	# arrange all graphs in a list
+	from itertools import product
+	# G = G.to_directed()
+	gt = nx.DiGraph()
+	# add nodes
+	for u, v in product(G1, G2):
+		if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
+			gt.add_node((u, v))
+			gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
+	# add edges, faster for sparse graphs (no so many edges), which is the most case for now.
+	for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
+		if (u1, u2) in gt and (
+				v1, v2
+		) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
+															 v2][edge_label]:
+			gt.add_edge((u1, u2), (v1, v2))
+			gt.edges[(u1, u2), (v1, v2)].update({
+				edge_label:
+				G1.edges[u1, v1][edge_label]
+			})
+
+	# # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
+	# for u, v in product(gt, gt):
+	#	 if (u[0], v[0]) in G1.edges and (
+	#			 u[1], v[1]
+	#	 ) in G2.edges and G1.edges[u[0],
+	#								v[0]][edge_label] == G2.edges[u[1],
+	#															  v[1]][edge_label]:
+	#		 gt.add_edge((u[0], u[1]), (v[0], v[1]))
+	#		 gt.edges[(u[0], u[1]), (v[0], v[1])].update({
+	#			 edge_label:
+	#			 G1.edges[u[0], v[0]][edge_label]
+	#		 })
+
+	# relabel nodes using consecutive integers for convenience of kernel calculation.
+	# gt = nx.convert_node_labels_to_integers(
+	#	 gt, first_label=0, label_attribute='label_orignal')
+	return gt
+
+
+def direct_product_graph(G1, G2, node_labels, edge_labels):
+	"""Return the direct/tensor product of directed graphs G1 and G2.
+
+	Parameters
+	----------
+	G1, G2 : NetworkX graph
+		The original graphs.
+	node_labels : list
+		A list of node attributes used as labels.
+	edge_labels : list
+		A list of edge attributes used as labels.
+		
+	Return
+	------
+	gt : NetworkX graph
+		The direct product graph of G1 and G2.
+
+	Notes
+	-----
+	This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
+
+	References
+	----------
+	.. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
+	"""
+	# arrange all graphs in a list
+	from itertools import product
+	# G = G.to_directed()
+	gt = nx.DiGraph()
+	# add nodes
+	for u, v in product(G1, G2):
+		label1 = tuple(G1.nodes[u][nl] for nl in node_labels)
+		label2 = tuple(G2.nodes[v][nl] for nl in node_labels)
+		if label1 == label2:
+			gt.add_node((u, v), node_label=label1)
+
+	# add edges, faster for sparse graphs (no so many edges), which is the most case for now.
+	for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
+		if (u1, u2) in gt and (v1, v2) in gt:
+			label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels)
+			label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels)
+			if label1 == label2:
+				gt.add_edge((u1, u2), (v1, v2), edge_label=label1)
+
+
+	# # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
+	# for u, v in product(gt, gt):
+	#	 if (u[0], v[0]) in G1.edges and (
+	#			 u[1], v[1]
+	#	 ) in G2.edges and G1.edges[u[0],
+	#								v[0]][edge_label] == G2.edges[u[1],
+	#															  v[1]][edge_label]:
+	#		 gt.add_edge((u[0], u[1]), (v[0], v[1]))
+	#		 gt.edges[(u[0], u[1]), (v[0], v[1])].update({
+	#			 edge_label:
+	#			 G1.edges[u[0], v[0]][edge_label]
+	#		 })
+
+	# relabel nodes using consecutive integers for convenience of kernel calculation.
+	# gt = nx.convert_node_labels_to_integers(
+	#	 gt, first_label=0, label_attribute='label_orignal')
+	return gt
+
+
+def graph_deepcopy(G):
+	"""Deep copy a graph, including deep copy of all nodes, edges and 
+	attributes of the graph, nodes and edges.
+	
+	Note
+	----
+	It is the same as the NetworkX function graph.copy(), as far as I know.
+	"""
+	# add graph attributes.
+	labels = {}
+	for k, v in G.graph.items():
+		labels[k] = deepcopy(v)
+	if G.is_directed():
+		G_copy = nx.DiGraph(**labels)
+	else:
+		G_copy = nx.Graph(**labels)
+		
+	# add nodes	
+	for nd, attrs in G.nodes(data=True):
+		labels = {}
+		for k, v in attrs.items():
+			labels[k] = deepcopy(v)
+		G_copy.add_node(nd, **labels)
+		
+	# add edges.
+	for nd1, nd2, attrs in G.edges(data=True):
+		labels = {}
+		for k, v in attrs.items():
+			labels[k] = deepcopy(v)
+		G_copy.add_edge(nd1, nd2, **labels)
+	
+	return G_copy
+
+
+def graph_isIdentical(G1, G2):
+	"""Check if two graphs are identical, including: same nodes, edges, node
+	labels/attributes, edge labels/attributes.
+	
+	Notes
+	-----
+	1. The type of graphs has to be the same.
+
+	2. Global/Graph attributes are neglected as they may contain names for graphs.
+	"""
+	# check nodes.
+	nlist1 = [n for n in G1.nodes(data=True)]
+	nlist2 = [n for n in G2.nodes(data=True)]
+	if not nlist1 == nlist2:
+		return False
+	# check edges.
+	elist1 = [n for n in G1.edges(data=True)]
+	elist2 = [n for n in G2.edges(data=True)]
+	if not elist1 == elist2:
+		return False
+	# check graph attributes.
+	
+	return True
+
+
+def get_node_labels(Gn, node_label):
+	"""Get node labels of dataset Gn.
+	"""
+	nl = set()
+	for G in Gn:
+		nl = nl | set(nx.get_node_attributes(G, node_label).values())
+	return nl
+
+
+def get_edge_labels(Gn, edge_label):
+	"""Get edge labels of dataset Gn.
+	"""
+	el = set()
+	for G in Gn:
+		el = el | set(nx.get_edge_attributes(G, edge_label).values())
+	return el
+
+
+def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
+	if name == 'Marginalized':
+		from gklearn.kernels import Marginalized
+		graph_kernel = Marginalized(node_labels=node_labels,
+								 edge_labels=edge_labels,
+								 ds_infos=ds_infos,
+								 **kernel_options)
+	elif name == 'ShortestPath':
+		from gklearn.kernels import ShortestPath
+		graph_kernel = ShortestPath(node_labels=node_labels,
+								 node_attrs=node_attrs,
+								 ds_infos=ds_infos,
+								 **kernel_options)
+	elif name == 'StructuralSP':
+		from gklearn.kernels import StructuralSP
+		graph_kernel = StructuralSP(node_labels=node_labels,
+								  edge_labels=edge_labels, 
+								  node_attrs=node_attrs,
+								  edge_attrs=edge_attrs,
+								  ds_infos=ds_infos,
+								  **kernel_options)
+	elif name == 'PathUpToH':
+		from gklearn.kernels import PathUpToH
+		graph_kernel = PathUpToH(node_labels=node_labels,
+							  edge_labels=edge_labels,
+							  ds_infos=ds_infos,
+							  **kernel_options)
+	elif name == 'Treelet':
+		from gklearn.kernels import Treelet
+		graph_kernel = Treelet(node_labels=node_labels,
+							  edge_labels=edge_labels,
+							  ds_infos=ds_infos,
+							  **kernel_options)
+	elif name == 'WLSubtree':
+		from gklearn.kernels import WLSubtree
+		graph_kernel = WLSubtree(node_labels=node_labels,
+							  edge_labels=edge_labels,
+							  ds_infos=ds_infos,
+							  **kernel_options)
+	elif name == 'WeisfeilerLehman':
+		from gklearn.kernels import WeisfeilerLehman
+		graph_kernel = WeisfeilerLehman(node_labels=node_labels,
+							  edge_labels=edge_labels,
+							  ds_infos=ds_infos,
+							  **kernel_options)
+	else:
+		raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".')
+
+	return graph_kernel
+
+
+def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False):
+	import os
+	from gklearn.utils import Dataset, split_dataset_by_target
+	
+	# 1. get dataset.
+	print('1. getting dataset...')
+	dataset_all = Dataset()
+	dataset_all.load_predefined_dataset(ds_name)
+	dataset_all.trim_dataset(edge_required=edge_required)
+	if not irrelevant_labels is None:
+		dataset_all.remove_labels(**irrelevant_labels)
+# 	dataset_all.cut_graphs(range(0, 10))
+	datasets = split_dataset_by_target(dataset_all)
+	
+	gram_matrix_unnorm_list = []
+	run_time_list = []
+	
+	print('start generating preimage for each class of target...')
+	for idx, dataset in enumerate(datasets):
+		target = dataset.targets[0]
+		print('\ntarget =', target, '\n')
+		
+		# 2. initialize graph kernel.
+		print('2. initializing graph kernel and setting parameters...')
+		graph_kernel = get_graph_kernel_by_name(kernel_options['name'], 
+										  node_labels=dataset.node_labels,
+										  edge_labels=dataset.edge_labels, 
+										  node_attrs=dataset.node_attrs,
+										  edge_attrs=dataset.edge_attrs,
+										  ds_infos=dataset.get_dataset_infos(keys=['directed']),
+										  kernel_options=kernel_options)
+
+		# 3. compute gram matrix.
+		print('3. computing gram matrix...')
+		gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
+		gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
+		
+		gram_matrix_unnorm_list.append(gram_matrix_unnorm)
+		run_time_list.append(run_time)
+				
+	# 4. save results.
+	print()
+	print('4. saving results...')
+	if save_results:
+		if not os.path.exists(dir_save):
+			os.makedirs(dir_save)
+		np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)	
+
+	print('\ncomplete.')	
+	
+	
+def find_paths(G, source_node, length):
+	"""Find all paths with a certain length those start from a source node. 
+	A recursive depth first search is applied.
+	
+	Parameters
+	----------
+	G : NetworkX graphs
+		The graph in which paths are searched.
+	source_node : integer
+		The number of the node from where all paths start.
+	length : integer
+		The length of paths.
+		
+	Return
+	------
+	path : list of list
+		List of paths retrieved, where each path is represented by a list of nodes.
+	"""
+	if length == 0:
+		return [[source_node]]
+	path = [[source_node] + path for neighbor in G[source_node] \
+		for path in find_paths(G, neighbor, length - 1) if source_node not in path]
+	return path
+
+
+def find_all_paths(G, length, is_directed):
+	"""Find all paths with a certain length in a graph. A recursive depth first
+	search is applied.
+	
+	Parameters
+	----------
+	G : NetworkX graphs
+		The graph in which paths are searched.
+	length : integer
+		The length of paths.
+		
+	Return
+	------
+	path : list of list
+		List of paths retrieved, where each path is represented by a list of nodes.
+	"""
+	all_paths = []
+	for node in G:
+		all_paths.extend(find_paths(G, node, length))
+		
+	if not is_directed:
+		# For each path, two presentations are retrieved from its two extremities. 
+		# Remove one of them.
+		all_paths_r = [path[::-1] for path in all_paths]  
+		for idx, path in enumerate(all_paths[:-1]):
+			for path2 in all_paths_r[idx+1::]:
+				if path == path2:
+					all_paths[idx] = []
+					break
+		all_paths = list(filter(lambda a: a != [], all_paths))
+			
+	return all_paths
+
+
+def get_mlti_dim_node_attrs(G, attr_names):
+	attributes = []
+	for nd, attrs in G.nodes(data=True):
+		attributes.append(tuple(attrs[aname] for aname in attr_names))
+	return attributes
+
+
+def get_mlti_dim_edge_attrs(G, attr_names):
+	attributes = []
+	for ed, attrs in G.edges(data=True):
+		attributes.append(tuple(attrs[aname] for aname in attr_names))
+	return attributes
+	
+	
+def normalize_gram_matrix(gram_matrix):
+	diag = gram_matrix.diagonal().copy()
+	for i in range(len(gram_matrix)):
+		for j in range(i, len(gram_matrix)):
+			gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
+			gram_matrix[j][i] = gram_matrix[i][j]
+	return gram_matrix
+	
+	
+def compute_distance_matrix(gram_matrix):
+	dis_mat = np.empty((len(gram_matrix), len(gram_matrix)))
+	for i in range(len(gram_matrix)):
+		for j in range(i, len(gram_matrix)):
+			dis = gram_matrix[i, i] + gram_matrix[j, j] - 2 * gram_matrix[i, j]
+			if dis < 0:
+				if dis > -1e-10:
+					dis = 0
+				else:
+					raise ValueError('The distance is negative.')
+			dis_mat[i, j] = np.sqrt(dis)
+			dis_mat[j, i] = dis_mat[i, j]
+	dis_max = np.max(np.max(dis_mat))
+	dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
+	dis_mean = np.mean(np.mean(dis_mat))
+	return dis_mat, dis_max, dis_min, dis_mean
+
+
+def dummy_node():
+	"""
+	/*!
+	 * @brief Returns a dummy node.
+	 * @return ID of dummy node.
+	 */
+	"""
+	return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why.
+
+
+def undefined_node():
+	"""
+	/*!
+	 * @brief Returns an undefined node.
+	 * @return ID of undefined node.
+	 */
+
+	"""
+	return np.inf
+
+
+def dummy_edge():
+	"""
+	/*!
+	 * @brief Returns a dummy edge.
+	 * @return ID of dummy edge.
+	 */
+
+	"""
+	return np.inf
+
+
+@unique
+class SpecialLabel(Enum):
+	"""can be used to define special labels.
+	"""
+	DUMMY = 1 # The dummy label.
+	# DUMMY = auto # enum.auto does not exist in Python 3.5.
\ No newline at end of file