Browse Source

Add experiment of random preimage.

v0.2.x
jajupmochi 5 years ago
parent
commit
50da423a1c
4 changed files with 1382 additions and 1 deletions
  1. +1
    -0
      gklearn/preimage/__init__.py
  2. +1192
    -0
      gklearn/preimage/experiments/xp_random_preimage.py
  3. +188
    -0
      gklearn/preimage/generate_random_preimages_by_class.py
  4. +1
    -1
      gklearn/preimage/utils.py

+ 1
- 0
gklearn/preimage/__init__.py View File

@@ -14,3 +14,4 @@ from gklearn.preimage.preimage_generator import PreimageGenerator
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class

+ 1192
- 0
gklearn/preimage/experiments/xp_random_preimage.py
File diff suppressed because it is too large
View File


+ 188
- 0
gklearn/preimage/generate_random_preimages_by_class.py View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 1 17:02:51 2020

@author: ljia
"""

import numpy as np
from gklearn.utils import Dataset
import csv
import os
import os.path
from gklearn.preimage import RandomPreimageGenerator
from gklearn.utils import split_dataset_by_target
from gklearn.utils.graphfiles import saveGXL


def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=True, save_preimages=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None):
# 1. get dataset.
print('1. getting dataset...')
dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name)
dataset_all.trim_dataset(edge_required=edge_required)
if irrelevant_labels is not None:
dataset_all.remove_labels(**irrelevant_labels)
if cut_range is not None:
dataset_all.cut_graphs(cut_range)
datasets = split_dataset_by_target(dataset_all)

if save_results:
# create result files.
print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save)

dis_k_dataset_list = []
dis_k_preimage_list = []
time_precompute_gm_list = []
time_generate_list = []
time_total_list = []
itrs_list = []
num_updates_list = []
if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
if gmfile_exist:
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']]
time_precompute_gm_list = gmfile['run_time_list'].tolist()
else:
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
elif not load_gm:
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
else:
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']]
time_precompute_gm_list = gmfile['run_time_list'].tolist()
print('starting generating preimage for each class of target...')
idx_offset = 0
for idx, dataset in enumerate(datasets):
target = dataset.targets[0]
print('\ntarget =', target, '\n')
# if target != 1:
# continue
num_graphs = len(dataset.graphs)
if num_graphs < 2:
print('\nnumber of graphs = ', num_graphs, ', skip.\n')
idx_offset += 1
continue
# 2. set parameters.
print('2. initializing mpg and setting parameters...')
if load_gm:
if gmfile_exist:
rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset]
rpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset]
rpg = RandomPreimageGenerator()
rpg.dataset = dataset
rpg.set_options(**rpg_options.copy())
rpg.kernel_options = kernel_options.copy()

# 3. compute preimage.
print('3. computing preimage...')
rpg.run()
results = rpg.get_results()
# 4. save results (and median graphs).
print('4. saving results (and preimages)...')
# write result detail.
if save_results:
print('writing results to files...')

f_detail = open(dir_save + fn_output_detail, 'a')
csv.writer(f_detail).writerow([ds_name, kernel_options['name'],
num_graphs, target, 1,
results['k_dis_dataset'], results['k_dis_preimage'],
results['runtime_precompute_gm'],
results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['num_updates']])
f_detail.close()
# compute result summary.
dis_k_dataset_list.append(results['k_dis_dataset'])
dis_k_preimage_list.append(results['k_dis_preimage'])
time_precompute_gm_list.append(results['runtime_precompute_gm'])
time_generate_list.append(results['runtime_generate_preimage'])
time_total_list.append(results['runtime_total'])
itrs_list.append(results['itrs'])
num_updates_list.append(results['num_updates'])
# write result summary for each letter.
f_summary = open(dir_save + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, kernel_options['name'],
num_graphs, target,
results['k_dis_dataset'], results['k_dis_preimage'],
results['runtime_precompute_gm'],
results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['num_updates']])
f_summary.close()
# save median graphs.
if save_preimages:
if not os.path.exists(dir_save + 'preimages/'):
os.makedirs(dir_save + 'preimages/')
print('Saving preimages to files...')
fn_best_dataset = dir_save + 'preimages/g_best_dataset.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
fn_preimage = dir_save + 'preimages/g_preimage.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
gram_matrix_unnorm_list.append(rpg.gram_matrix_unnorm)

# write result summary for each class.
if save_results:
dis_k_dataset_mean = np.mean(dis_k_dataset_list)
dis_k_preimage_mean = np.mean(dis_k_preimage_list)
time_precompute_gm_mean = np.mean(time_precompute_gm_list)
time_generate_mean = np.mean(time_generate_list)
time_total_mean = np.mean(time_total_list)
itrs_mean = np.mean(itrs_list)
num_updates_mean = np.mean(num_updates_list)
f_summary = open(dir_save + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, kernel_options['name'],
num_graphs, 'all',
dis_k_dataset_mean, dis_k_preimage_mean,
time_precompute_gm_mean,
time_generate_mean, time_total_mean, itrs_mean,
num_updates_mean])
f_summary.close()
# write Gram matrices to file.
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)

print('\ncomplete.\n')

def __init_output_file_preimage(ds_name, gkernel, dir_output):
if not os.path.exists(dir_output):
os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'num graphs',
'target', 'repeat', 'dis_k best from dataset', 'dis_k preimage',
'time precompute gm', 'time generate preimage', 'time total',
'itrs', 'num updates'])
f_detail.close()
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'num graphs',
'target', 'dis_k best from dataset', 'dis_k preimage',
'time precompute gm', 'time generate preimage', 'time total',
'itrs', 'num updates'])
f_summary.close()
return fn_output_detail, fn_output_summary

+ 1
- 1
gklearn/preimage/utils.py View File

@@ -256,7 +256,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm)

# write result summary for each letter.
# write result summary for each class.
if save_results:
sod_sm_mean = np.mean(sod_sm_list)
sod_gm_mean = np.mean(sod_gm_list)


Loading…
Cancel
Save