|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Thu Jul 4 12:20:16 2019
-
- @author: ljia
- """
- import numpy as np
- import networkx as nx
- import matplotlib.pyplot as plt
- import time
- from tqdm import tqdm
-
- import sys
- sys.path.insert(0, "../")
- from pygraph.utils.graphfiles import loadDataset
- from median import draw_Letter_graph
-
-
- # --------------------------- These are tests --------------------------------#
-
- def test_who_is_the_closest_in_kernel_space(Gn):
- idx_gi = [0, 6]
- g1 = Gn[idx_gi[0]]
- g2 = Gn[idx_gi[1]]
- # create the "median" graph.
- gnew = g2.copy()
- gnew.remove_node(0)
- nx.draw_networkx(gnew)
- plt.show()
- print(gnew.nodes(data=True))
- Gn = [gnew] + Gn
-
- # compute gram matrix
- Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
- # the distance matrix
- dmatrix = gram2distances(Kmatrix)
- print(np.sort(dmatrix[idx_gi[0] + 1]))
- print(np.argsort(dmatrix[idx_gi[0] + 1]))
- print(np.sort(dmatrix[idx_gi[1] + 1]))
- print(np.argsort(dmatrix[idx_gi[1] + 1]))
- # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
- dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
- print(np.sort(dis_median))
- print(np.argsort(dis_median))
- return
-
-
- def test_who_is_the_closest_in_GED_space(Gn):
- from iam import GED
- idx_gi = [0, 6]
- g1 = Gn[idx_gi[0]]
- g2 = Gn[idx_gi[1]]
- # create the "median" graph.
- gnew = g2.copy()
- gnew.remove_node(0)
- nx.draw_networkx(gnew)
- plt.show()
- print(gnew.nodes(data=True))
- Gn = [gnew] + Gn
-
- # compute GEDs
- ged_matrix = np.zeros((len(Gn), len(Gn)))
- for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
- for i2 in range(len(Gn)):
- dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
- ged_matrix[i1, i2] = dis
- print(np.sort(ged_matrix[idx_gi[0] + 1]))
- print(np.argsort(ged_matrix[idx_gi[0] + 1]))
- print(np.sort(ged_matrix[idx_gi[1] + 1]))
- print(np.argsort(ged_matrix[idx_gi[1] + 1]))
- # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
- dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
- print(np.sort(dis_median))
- print(np.argsort(dis_median))
- return
-
-
- def test_will_IAM_give_the_median_graph_we_wanted(Gn):
- idx_gi = [0, 6]
- g1 = Gn[idx_gi[0]].copy()
- g2 = Gn[idx_gi[1]].copy()
- # del Gn[idx_gi[0]]
- # del Gn[idx_gi[1] - 1]
- g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
- # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
- nx.draw_networkx(g_median)
- plt.show()
- print(g_median.nodes(data=True))
- print(g_median.edges(data=True))
-
-
- def test_new_IAM_allGraph_deleteNodes(Gn):
- idx_gi = [0, 6]
- # g1 = Gn[idx_gi[0]].copy()
- # g2 = Gn[idx_gi[1]].copy()
-
- # g1 = nx.Graph(name='haha')
- # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
- # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
- # g2 = nx.Graph(name='hahaha')
- # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
- # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
- # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
- # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
-
- g1 = nx.Graph(name='haha')
- g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
- (3, {'atom': 'S'}), (4, {'atom': 'S'})])
- g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
- (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
- g2 = nx.Graph(name='hahaha')
- g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
- (3, {'atom': 'O'}), (4, {'atom': 'O'})])
- g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
- (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-
- # g2 = g1.copy()
- # g2.add_nodes_from([(3, {'atom': 'O'})])
- # g2.add_nodes_from([(4, {'atom': 'C'})])
- # g2.add_edges_from([(1, 3, {'bond_type': '1'})])
- # g2.add_edges_from([(3, 4, {'bond_type': '1'})])
-
- # del Gn[idx_gi[0]]
- # del Gn[idx_gi[1] - 1]
-
- nx.draw_networkx(g1)
- plt.show()
- print(g1.nodes(data=True))
- print(g1.edges(data=True))
- nx.draw_networkx(g2)
- plt.show()
- print(g2.nodes(data=True))
- print(g2.edges(data=True))
-
- g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
- # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
- nx.draw_networkx(g_median)
- plt.show()
- print(g_median.nodes(data=True))
- print(g_median.edges(data=True))
-
-
- def test_the_simple_two(Gn, gkernel):
- from gk_iam import gk_iam_nearest_multi, compute_kernel
- lmbda = 0.03 # termination probalility
- r_max = 10 # recursions
- l = 500
- alpha_range = np.linspace(0.5, 0.5, 1)
- k = 2 # k nearest neighbors
-
- # randomly select two molecules
- np.random.seed(1)
- idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
- g1 = Gn[idx_gi[0]]
- g2 = Gn[idx_gi[1]]
- Gn_mix = [g.copy() for g in Gn]
- Gn_mix.append(g1.copy())
- Gn_mix.append(g2.copy())
-
- # g_tmp = iam([g1, g2])
- # nx.draw_networkx(g_tmp)
- # plt.show()
-
- # compute
- # k_list = [] # kernel between each graph and itself.
- # k_g1_list = [] # kernel between each graph and g1
- # k_g2_list = [] # kernel between each graph and g2
- # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
- # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
- # k_list.append(ktemp[0][0, 0])
- # k_g1_list.append(ktemp[0][0, 1])
- # k_g2_list.append(ktemp[0][0, 2])
-
- km = compute_kernel(Gn_mix, gkernel, True)
- # k_list = np.diag(km) # kernel between each graph and itself.
- # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
- # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2
-
- g_best = []
- dis_best = []
- # for each alpha
- for alpha in alpha_range:
- print('alpha =', alpha)
- dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
- range(len(Gn), len(Gn) + 2), km,
- k, r_max,gkernel)
- dis_best.append(dhat)
- g_best.append(ghat_list)
-
- for idx, item in enumerate(alpha_range):
- print('when alpha is', item, 'the shortest distance is', dis_best[idx])
- print('the corresponding pre-images are')
- for g in g_best[idx]:
- nx.draw_networkx(g)
- plt.show()
- print(g.nodes(data=True))
- print(g.edges(data=True))
-
-
- def test_remove_bests(Gn, gkernel):
- from gk_iam import gk_iam_nearest_multi, compute_kernel
- lmbda = 0.03 # termination probalility
- r_max = 10 # recursions
- l = 500
- alpha_range = np.linspace(0.5, 0.5, 1)
- k = 20 # k nearest neighbors
-
- # randomly select two molecules
- np.random.seed(1)
- idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
- g1 = Gn[idx_gi[0]]
- g2 = Gn[idx_gi[1]]
- # remove the best 2 graphs.
- del Gn[idx_gi[0]]
- del Gn[idx_gi[1] - 1]
- # del Gn[8]
-
- Gn_mix = [g.copy() for g in Gn]
- Gn_mix.append(g1.copy())
- Gn_mix.append(g2.copy())
-
-
- # compute
- km = compute_kernel(Gn_mix, gkernel, True)
- g_best = []
- dis_best = []
- # for each alpha
- for alpha in alpha_range:
- print('alpha =', alpha)
- dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
- range(len(Gn), len(Gn) + 2), km,
- k, r_max, gkernel)
- dis_best.append(dhat)
- g_best.append(ghat_list)
-
- for idx, item in enumerate(alpha_range):
- print('when alpha is', item, 'the shortest distance is', dis_best[idx])
- print('the corresponding pre-images are')
- for g in g_best[idx]:
- draw_Letter_graph(g)
- # nx.draw_networkx(g)
- # plt.show()
- print(g.nodes(data=True))
- print(g.edges(data=True))
-
-
- def test_gkiam_letter_h():
- from gk_iam import gk_iam_nearest_multi, compute_kernel
- from iam import median_distance
- ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
- 'extra_params': {}} # node nsymb
- # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
- # 'extra_params': {}} # node nsymb
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- gkernel = 'structuralspkernel'
-
- lmbda = 0.03 # termination probalility
- r_max = 3 # recursions
- # alpha_range = np.linspace(0.5, 0.5, 1)
- k = 10 # k nearest neighbors
-
- # classify graphs according to letters.
- idx_dict = get_same_item_indices(y_all)
- time_list = []
- sod_list = []
- sod_min_list = []
- for letter in idx_dict:
- print('\n-------------------------------------------------------\n')
- Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
- Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-
- alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-
- # compute
- time0 = time.time()
- km = compute_kernel(Gn_mix, gkernel, True)
- g_best = []
- dis_best = []
- # for each alpha
- for alpha in alpha_range:
- print('alpha =', alpha)
- dhat, ghat_list = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let),
- range(len(Gn_let), len(Gn_mix)), km,
- k, r_max, gkernel, c_ei=1.7,
- c_er=1.7, c_es=1.7)
- dis_best.append(dhat)
- g_best.append(ghat_list)
- time_list.append(time.time() - time0)
-
- # show best graphs and save them to file.
- for idx, item in enumerate(alpha_range):
- print('when alpha is', item, 'the shortest distance is', dis_best[idx])
- print('the corresponding pre-images are')
- for g in g_best[idx]:
- draw_Letter_graph(g, savepath='results/gk_iam/')
- # nx.draw_networkx(g)
- # plt.show()
- print(g.nodes(data=True))
- print(g.edges(data=True))
-
- # compute the corresponding sod in graph space. (alpha range not considered.)
- sod_tmp, _ = median_distance(g_best[0], Gn_let)
- sod_list.append(sod_tmp)
- sod_min_list.append(np.min(sod_tmp))
-
-
- print('\nsods in graph space: ', sod_list)
- print('\nsmallest sod in graph space for each letter: ', sod_min_list)
- print('\ntimes:', time_list)
-
-
- def get_same_item_indices(ls):
- """Get the indices of the same items in a list. Return a dict keyed by items.
- """
- idx_dict = {}
- for idx, item in enumerate(ls):
- if item in idx_dict:
- idx_dict[item].append(idx)
- else:
- idx_dict[item] = [idx]
- return idx_dict
-
-
- #def compute_letter_median_by_average(Gn):
- # return g_median
-
-
- def test_iam_letter_h():
- from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
- from gk_iam import dis_gstar, compute_kernel
- ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
- 'extra_params': {}} # node nsymb
- # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
- # 'extra_params': {}} # node nsymb
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
- lmbda = 0.03 # termination probalility
- # alpha_range = np.linspace(0.5, 0.5, 1)
-
- # classify graphs according to letters.
- idx_dict = get_same_item_indices(y_all)
- time_list = []
- sod_list = []
- sod_min_list = []
- for letter in idx_dict:
- Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-
- alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-
- # compute
- g_best = []
- dis_best = []
- time0 = time.time()
- # for each alpha
- for alpha in alpha_range:
- print('alpha =', alpha)
- ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
- Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7)
- dis_best.append(dhat)
- g_best.append(ghat_list)
- time_list.append(time.time() - time0)
-
- # show best graphs and save them to file.
- for idx, item in enumerate(alpha_range):
- print('when alpha is', item, 'the shortest distance is', dis_best[idx])
- print('the corresponding pre-images are')
- for g in g_best[idx]:
- draw_Letter_graph(g, savepath='results/iam/')
- # nx.draw_networkx(g)
- # plt.show()
- print(g.nodes(data=True))
- print(g.edges(data=True))
-
- # compute the corresponding sod in kernel space. (alpha range not considered.)
- gkernel = 'structuralspkernel'
- sod_tmp = []
- Gn_mix = g_best[0] + Gn_let
- km = compute_kernel(Gn_mix, gkernel, True)
- for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
- dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)),
- [alpha_range[0]] * len(Gn_let), km, withterm3=False)
- sod_tmp.append(dtemp)
- sod_list.append(sod_tmp)
- sod_min_list.append(np.min(sod_tmp))
-
-
- print('\nsods in kernel space: ', sod_list)
- print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
- print('\ntimes:', time_list)
-
-
- if __name__ == '__main__':
- # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- # 'extra_params': {}} # node/edge symb
- ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
- 'extra_params': {}} # node nsymb
- # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
- # 'extra_params': {}}
- # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
- # 'extra_params': {}} # node symb
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- # Gn = Gn[0:20]
-
- # import networkx.algorithms.isomorphism as iso
- # G1 = nx.MultiDiGraph()
- # G2 = nx.MultiDiGraph()
- # G1.add_nodes_from([1,2,3], fill='red')
- # G2.add_nodes_from([10,20,30,40], fill='red')
- # nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
- # nx.add_path(G2, [10,20,30,40], weight=3)
- # nm = iso.categorical_node_match('fill', 'red')
- # print(nx.is_isomorphic(G1, G2, node_match=nm))
- #
- # test_new_IAM_allGraph_deleteNodes(Gn)
- # test_will_IAM_give_the_median_graph_we_wanted(Gn)
- # test_who_is_the_closest_in_GED_space(Gn)
- # test_who_is_the_closest_in_kernel_space(Gn)
-
- # test_the_simple_two(Gn, 'untilhpathkernel')
- # test_remove_bests(Gn, 'untilhpathkernel')
- test_gkiam_letter_h()
- # test_iam_letter_h()
|