New translations accuracy_diff_entropy.py (French)

4 years ago · 3efcee0322
--- a/lang/fr/gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py
+++ b/lang/fr/gklearn/experiments/papers/PRL_2020/accuracy_diff_entropy.py
@@ -0,0 +1,186 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Mon Oct  5 16:08:33 2020

@author: ljia

 This script compute classification accuracy of each geaph kernel on datasets 
 with different entropy of degree distribution.
 """
 from utils import Graph_Kernel_List, cross_validate
 import numpy as np
 import logging

 num_nodes = 40
 half_num_graphs = 100


 def generate_graphs():
 # 	from gklearn.utils.graph_synthesizer import GraphSynthesizer
 # 	gsyzer = GraphSynthesizer()
 # 	graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
 # 	return graphs
 	import networkx as nx
 	
 	degrees11 = [5] * num_nodes
 # 	degrees12 = [2] * num_nodes
 	degrees12 = [5] * num_nodes
 	degrees21 = list(range(1, 11)) * 6
 # 	degrees22 = [5 * i for i in list(range(1, 11)) * 6]
 	degrees22 = list(range(1, 11)) * 6
 	
 	# method 1
 	graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)]
 	graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)]
 	
 	# method 2: can easily generate isomorphic graphs.
 # 	graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)]
 # 	graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)]
 	
 	# Add node labels.
 	for g in graphs11:
 		for n in g.nodes():
 			g.nodes[n]['atom'] = 0
 	for g in graphs12:
 		for n in g.nodes():
 			g.nodes[n]['atom'] = 1
 		
 	graphs1 = graphs11 + graphs12

 	# method 1: the entorpy of the two classes is not the same.
 	graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
 	graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)]	
 	
 # 	# method 2: tooo slow, and may fail.
 # 	graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)]
 # 	graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)]	

 # 	# method 3: no randomness.
 # 	graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)]
 # 	graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)]

 # 	# method 4:
 # 	graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]
 # 	graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)]	
 	
 # 	# method 5: the entorpy of the two classes is not the same.
 # 	graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)]
 # 	graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)]	
 	
 # 	# method 6: seems there is no randomness0
 # 	graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]
 # 	graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)]	

 	# Add node labels.
 	for g in graphs21:
 		for n in g.nodes():
 			g.nodes[n]['atom'] = 0
 	for g in graphs22:
 		for n in g.nodes():
 			g.nodes[n]['atom'] = 1

 	graphs2 = graphs21 + graphs22
 	
 # 	# check for isomorphism.
 # 	iso_mat1 = np.zeros((len(graphs1), len(graphs1)))
 # 	num1 = 0
 # 	num2 = 0
 # 	for i in range(len(graphs1)):
 # 		for j in range(i + 1, len(graphs1)):
 # 			 if nx.is_isomorphic(graphs1[i], graphs1[j]):
 # 				 iso_mat1[i, j] = 1
 # 				 iso_mat1[j, i] = 1
 # 				 num1 += 1
 # 				 print('iso:', num1, ':', i, ',', j)
 # 			 else:
 # 				 num2 += 1
 # 				 print('not iso:', num2, ':', i, ',', j)
 # 				 
 # 	iso_mat2 = np.zeros((len(graphs2), len(graphs2)))
 # 	num1 = 0
 # 	num2 = 0
 # 	for i in range(len(graphs2)):
 # 		for j in range(i + 1, len(graphs2)):
 # 			 if nx.is_isomorphic(graphs2[i], graphs2[j]):
 # 				 iso_mat2[i, j] = 1
 # 				 iso_mat2[j, i] = 1
 # 				 num1 += 1
 # 				 print('iso:', num1, ':', i, ',', j)
 # 			 else:
 # 				 num2 += 1
 # 				 print('not iso:', num2, ':', i, ',', j)
 		
 	return graphs1, graphs2


 def get_infos(graph):
 	from gklearn.utils import Dataset
 	ds = Dataset()
 	ds.load_graphs(graph)
 	infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree'])
 	infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy'])
 	print(infos['ave_degree_entropy'], ',', infos['ave_node_degree'])
 	return infos


 def xp_accuracy_diff_entropy():
 	
 	# Generate graphs.
 	graphs1, graphs2 = generate_graphs()

 	
 	# Compute entropy of degree distribution of the generated graphs.
 	info11 = get_infos(graphs1[0:half_num_graphs])
 	info12 = get_infos(graphs1[half_num_graphs:])
 	info21 = get_infos(graphs2[0:half_num_graphs])
 	info22 = get_infos(graphs2[half_num_graphs:])

 	# Run and save.
 	import pickle
 	import os
 	save_dir = 'outputs/accuracy_diff_entropy/'
 	if not os.path.exists(save_dir):
 		os.makedirs(save_dir)

 	accuracies = {}
 	confidences = {}
 	
 	for kernel_name in Graph_Kernel_List:
 		print()
 		print('Kernel:', kernel_name)
 		
 		accuracies[kernel_name] = []
 		confidences[kernel_name] = []
 		for set_i, graphs in enumerate([graphs1, graphs2]):
 			print()
 			print('Graph set', set_i)
 			
 			tmp_graphs = [g.copy() for g in graphs]
 			targets = [0] * half_num_graphs + [1] * half_num_graphs
 			
 			accuracy = 'error'
 			confidence = 'error'
 			try:
 				accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1)
 			except Exception as exp:
 				print('An exception occured when running this experiment:')
 				LOG_FILENAME = save_dir + 'error.txt'
 				logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 				logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':')
 				print(repr(exp))
 			accuracies[kernel_name].append(accuracy)
 			confidences[kernel_name].append(confidence)
 			
 			pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
 			pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb'))
 		
 	# Save all.	
 	pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb'))	
 	pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb'))	
 	
 	return


 if __name__ == '__main__':
 	xp_accuracy_diff_entropy()