Browse Source

[exp] fcsp: add program for slurm and the exps for space complexity.

v0.2.x
jajupmochi 4 years ago
parent
commit
62145db207
6 changed files with 1190 additions and 47 deletions
  1. +68
    -33
      gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp.py
  2. +98
    -0
      gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp_space.py
  3. +107
    -14
      gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp.py
  4. +225
    -0
      gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp_space.py
  5. +253
    -0
      gklearn/experiments/thesis/graph_kernels/fcsp/shortest_path.py
  6. +439
    -0
      gklearn/experiments/thesis/graph_kernels/fcsp/structural_sp.py

+ 68
- 33
gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp.py View File

@@ -10,6 +10,7 @@ This script compares the results with and without FCSP.
from gklearn.dataset import Dataset
from gklearn.utils import get_graph_kernel_by_name
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from gklearn.experiments import DATASET_ROOT
import functools
import os
import pickle
@@ -17,50 +18,77 @@ import sys
import logging


def run_all(fcsp):
save_dir = 'outputs/' + ('fscp' if fcsp == True else 'naive') + '/'
os.makedirs(save_dir, exist_ok=True)
# def run_all(fcsp):

# from sklearn.model_selection import ParameterGrid

# Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
# 'PAH_unlabeled', 'PAH', 'MUTAG', 'Monoterpens',
# 'Letter-high', 'Letter-med', 'Letter-low',
# 'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
# 'BZR', 'COX2', 'DHFR', 'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR',
# 'Cuneiform', 'KKI', 'OHSU', 'Peking_1', 'SYNTHETICnew',
# 'Synthie', 'SYNTHETIC', 'Fingerprint', 'IMDB-BINARY',
# 'IMDB-MULTI', 'COIL-DEL', 'PROTEINS', 'PROTEINS_full',
# 'Mutagenicity', 'REDDIT-BINARY']

# Kernel_List = ['ShortestPath', 'StructuralSP']

# task_grid = ParameterGrid({'kernel': Kernel_List[:], 'dataset': Dataset_List[:]})

# for task in list(task_grid):

from sklearn.model_selection import ParameterGrid
# save_file_suffix = '.' + task['kernel'] + '.' + task['dataset']
# file_name = os.path.join(save_dir, 'run_time' + save_file_suffix + '.pkl')
# if not os.path.isfile(file_name):
# print()
# print((task['kernel'], task['dataset']))

Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
'PAH_unlabeled', 'PAH', 'MUTAG', 'Letter-high', 'Letter-med', 'Letter-low',
'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
'BZR', 'COX2', 'DHFR', 'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR',
'Cuneiform', 'KKI', 'OHSU', 'Peking_1', 'SYNTHETICnew',
'Synthie', 'SYNTHETIC', 'Fingerprint', 'IMDB-BINARY',
'IMDB-MULTI', 'COIL-DEL', 'PROTEINS', 'PROTEINS_full',
'Mutagenicity', 'REDDIT-BINARY']
# try:
# gram_matrix, run_time = compute(task['kernel'], task['dataset'], fcsp)

Kernel_List = ['ShortestPath', 'StructuralSP']
# except Exception as exp:
# print('An exception occured when running this experiment:')
# LOG_FILENAME = save_dir + 'error.txt'
# logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
# logging.exception('\n--------------' + save_file_suffix + '------------------')
# print(repr(exp))
# else:
# save_file_suffix = '.' + task['kernel'] + task['dataset']

work_grid = ParameterGrid({'kernel': Kernel_List[:], 'dataset': Dataset_List[:]})
# with open(file_name, 'wb') as f:
# pickle.dump(run_time, f)

for work in list(work_grid):

save_file_suffix = '.' + work['kernel'] + '.' + work['dataset']
file_name = os.path.join(save_dir, 'run_time' + save_file_suffix + '.pkl')
if not os.path.isfile(file_name):
print()
print((work['kernel'], work['dataset']))

try:
gram_matrix, run_time = run_work(work['kernel'], work['dataset'], fcsp)
except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception(save_file_suffix)
print(repr(exp))
def run_task(kernel_name, ds_name, fcsp):
save_file_suffix = '.' + kernel_name + '.' + ds_name + '.' + str(fcsp)
file_name = os.path.join(save_dir, 'run_time' + save_file_suffix + '.pkl')

save_file_suffix = '.' + work['kernel'] + work['dataset']
if not os.path.isfile(file_name):
print()
print((kernel_name, ds_name, str(fcsp)))

try:
gram_matrix, run_time = compute(kernel_name, ds_name, fcsp)

except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = os.path.join(save_dir, 'error' + save_file_suffix + '.txt')
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception('\n--------------' + save_file_suffix + '------------------')
print(repr(exp))

else:
with open(file_name, 'wb') as f:
pickle.dump(run_time, f)


def run_work(kernel_name, ds_name, fcsp):
dataset = Dataset(ds_name, verbose=True)
def compute(kernel_name, ds_name, fcsp):
dataset = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
if kernel_name == 'ShortestPath':
dataset.trim_dataset(edge_required=True)


mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
node_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
@@ -87,8 +115,15 @@ def run_work(kernel_name, ds_name, fcsp):

if __name__ == '__main__':
if len(sys.argv) > 1:
fcsp = True if sys.argv[1] == 'True' else False
kernel_name = sys.argv[1]
ds_name = sys.argv[2]
fcsp = True if sys.argv[3] == 'True' else False
else:
kernel_name = 'ShortestPath'
ds_name = 'Acyclic'
fcsp = True
run_all(fcsp)

save_dir = 'outputs/'
os.makedirs(save_dir, exist_ok=True)

run_task(kernel_name, ds_name, fcsp)

+ 98
- 0
gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp_space.py View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 2 17:41:54 2020

@author: ljia

This script compares the results with and without FCSP.
"""
from gklearn.dataset import Dataset
from shortest_path import SPSpace
from structural_sp import SSPSpace
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from gklearn.experiments import DATASET_ROOT
import functools
import os
import pickle
import sys
import logging


def run_task(kernel_name, ds_name, fcsp):
save_file_suffix = '.' + kernel_name + '.' + ds_name + '.' + str(fcsp)
file_name = os.path.join(save_dir, 'space' + save_file_suffix + '.pkl')

# Return if the task is already completed.
if os.path.isfile(file_name):
with open(file_name, 'rb') as f:
data = pickle.load(f)
if data['completed']:
return

print()
print((kernel_name, ds_name, str(fcsp)))

try:
gram_matrix, run_time = compute(kernel_name, ds_name, fcsp, file_name)

except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = os.path.join(save_dir, 'error.space' + save_file_suffix + '.txt')
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception('\n--------------' + save_file_suffix + '------------------')
print(repr(exp))

# else:
# with open(file_name, 'wb') as f:
# pickle.dump(run_time, f)


def compute(kernel_name, ds_name, fcsp, file_name):
dataset = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
if kernel_name == 'ShortestPath':
dataset.trim_dataset(edge_required=True)
# dataset.cut_graphs(range(0, 10))
kernel_class = SPSpace
else:
# dataset.cut_graphs(range(0, 10))
kernel_class = SSPSpace

mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
node_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
edge_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}

graph_kernel = kernel_class(name=kernel_name,
node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
fcsp=fcsp,
compute_method='naive',
node_kernels=node_kernels,
edge_kernels=edge_kernels,
file_name=file_name
)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=None,
normalize=False,
verbose=2
)
return gram_matrix, run_time


if __name__ == '__main__':
if len(sys.argv) > 1:
kernel_name = sys.argv[1]
ds_name = sys.argv[2]
fcsp = True if sys.argv[3] == 'True' else False
else:
kernel_name = 'StructuralSP'
ds_name = 'Fingerprint'
fcsp = True

save_dir = 'outputs/'
os.makedirs(save_dir, exist_ok=True)

run_task(kernel_name, ds_name, fcsp)

+ 107
- 14
gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp.py View File

@@ -10,27 +10,60 @@ import os
import re


def get_job_script(param):
OUT_TIME_LIST = [('ShortestPath', 'ENZYMES', 'False'),
('StructuralSP', 'ENZYMES', 'True'),
('StructuralSP', 'ENZYMES', 'False'),
('StructuralSP', 'AIDS', 'False'),
('ShortestPath', 'NCI1', 'False'),
('StructuralSP', 'NCI1', 'True'),
('StructuralSP', 'NCI1', 'False'),
('ShortestPath', 'NCI109', 'False'),
('StructuralSP', 'NCI109', 'True'),
('StructuralSP', 'NCI109', 'False'),
('ShortestPath', 'DD', 'True'),
('ShortestPath', 'DD', 'False'),
('StructuralSP', 'BZR', 'False'),
('ShortestPath', 'COX2', 'False'),
('StructuralSP', 'COX2', 'False'),
('ShortestPath', 'DHFR', 'False'),
]

OUT_MEM_LIST = [('StructuralSP', 'PROTEINS', 'True'),
('StructuralSP', 'PROTEINS', 'False'),
('StructuralSP', 'PROTEINS_full', 'True'),
('StructuralSP', 'PROTEINS_full', 'False'),
('ShortestPath', 'REDDIT-BINARY', 'True'),
]

MISS_LABEL_LIST = [('StructuralSP', 'GREC', 'True'),
('StructuralSP', 'GREC', 'False'),
('StructuralSP', 'Web', 'True'),
('StructuralSP', 'Web', 'False'),
]


def get_job_script(kernel, dataset, fcsp):
script = r"""
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="fcsp.""" + param + r""""
#SBATCH --partition=long
#SBATCH --job-name="fcsp.""" + kernel + r"." + dataset + r"." + fcsp + r""""
#SBATCH --partition=tlong
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output="outputs/output_fcsp.""" + param + r""".txt"
#SBATCH --error="errors/error_fcsp.""" + param + r""".txt"
#SBATCH --output="outputs/output_fcsp.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
#SBATCH --error="errors/error_fcsp.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
#
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --time=100:00:00
#SBATCH --mem-per-cpu=4000
#SBATCH --time=300:00:00
##SBATCH --mem-per-cpu=4000
#SBATCH --mem=40000

srun hostname
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/thesis/graph_kernels/fcsp
srun python3 compare_fcsp.py """ + param
srun python3 compare_fcsp.py """ + kernel + r" " + dataset + r" " + fcsp
script = script.strip()
script = re.sub('\n\t+', '\n', script)
script = re.sub('\n +', '\n', script)
@@ -38,15 +71,75 @@ srun python3 compare_fcsp.py """ + param
return script


def check_task_status(save_dir, *params):
str_task_id = '.' + '.'.join(params)

# Check if the task is in out of memeory or out of space lists or missing labels.
if params in OUT_MEM_LIST or params in OUT_TIME_LIST or params in MISS_LABEL_LIST:
return True

# Check if the task is running or in queue of slurm.
command = 'squeue --user ljia02 --name "fcsp' + str_task_id + '" --format "%.2t" --noheader'
stream = os.popen(command)
output = stream.readlines()
if len(output) > 0:
return True

# Check if the results are already computed.
file_name = os.path.join(save_dir, 'run_time' + str_task_id + '.pkl')
if os.path.isfile(file_name):
return True

return False


if __name__ == '__main__':
save_dir = 'outputs/'
os.makedirs(save_dir, exist_ok=True)
os.makedirs('outputs/', exist_ok=True)
os.makedirs('errors/', exist_ok=True)

param_list = ['True', 'False']
for param in param_list[:]:
job_script = get_job_script(param)
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
from sklearn.model_selection import ParameterGrid

Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
'PAH_unlabeled', 'PAH', 'MUTAG', 'Monoterpens',
'Letter-high', 'Letter-med', 'Letter-low',
'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
# new: not so large.
'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR', 'Chiral', 'Vitamin_D',
'ACE', 'Steroid', 'KKI', 'Fingerprint', 'IMDB-BINARY',
'IMDB-MULTI', 'Peking_1', 'Cuneiform', 'OHSU', 'BZR', 'COX2',
'DHFR', 'SYNTHETICnew', 'Synthie', 'SYNTHETIC',
# new: large.
'TWITTER-Real-Graph-Partial', 'GREC', 'Web', 'MCF-7',
'MCF-7H', 'MOLT-4', 'MOLT-4H', 'NCI-H23', 'NCI-H23H',
'OVCAR-8', 'OVCAR-8H', 'P388', 'P388H', 'PC-3', 'PC-3H',
'SF-295', 'SF-295H', 'SN12C', 'SN12CH', 'SW-620', 'SW-620H',
'TRIANGLES', 'UACC257', 'UACC257H', 'Yeast', 'YeastH',
'COLORS-3', 'DBLP_v1', 'REDDIT-MULTI-12K',
'REDDIT-MULTI-12K', 'REDDIT-MULTI-12K',
'REDDIT-MULTI-12K', 'MSRC_9', 'MSRC_21', 'MSRC_21C',
'COLLAB', 'COIL-DEL',
'COIL-RAG', 'PROTEINS', 'PROTEINS_full', 'Mutagenicity',
'REDDIT-BINARY', 'FRANKENSTEIN', 'REDDIT-MULTI-5K',
'REDDIT-MULTI-12K']

Kernel_List = ['ShortestPath', 'StructuralSP']

fcsp_list = ['True', 'False']

task_grid = ParameterGrid({'kernel': Kernel_List[:],
'dataset': Dataset_List[:],
'fcsp': fcsp_list[:]})

from tqdm import tqdm

for task in tqdm(list(task_grid), desc='submitting tasks/jobs'):

if False == check_task_status(save_dir, task['kernel'], task['dataset'], task['fcsp']):
job_script = get_job_script(task['kernel'], task['dataset'], task['fcsp'])
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
# os.popen(command)
# output = stream.readlines()

+ 225
- 0
gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp_space.py View File

@@ -0,0 +1,225 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 14 11:49:43 2020

@author: ljia
"""

import os
import re
import pickle


OUT_TIME_LIST = []


OUT_MEM_LIST = [('ShortestPath', 'REDDIT-BINARY', 'True'),
('ShortestPath', 'REDDIT-BINARY', 'False'),
('ShortestPath', 'DD', 'True'),
('ShortestPath', 'DD', 'False'),
('ShortestPath', 'MCF-7', 'True'),
('ShortestPath', 'MCF-7', 'False'),
('StructuralSP', 'MCF-7', 'True'),
('StructuralSP', 'MCF-7', 'False'),
('ShortestPath', 'MCF-7H', 'True'),
('ShortestPath', 'MCF-7H', 'False'),
('StructuralSP', 'MCF-7H', 'True'),
('StructuralSP', 'MCF-7H', 'False'),
('ShortestPath', 'MOLT-4', 'True'),
('ShortestPath', 'MOLT-4', 'False'),
('StructuralSP', 'MOLT-4', 'True'),
('StructuralSP', 'MOLT-4', 'False'),
('ShortestPath', 'MOLT-4H', 'True'),
('ShortestPath', 'MOLT-4H', 'False'),
('StructuralSP', 'MOLT-4H', 'True'),
('StructuralSP', 'MOLT-4H', 'False'),
('ShortestPath', 'P388', 'True'),
('ShortestPath', 'P388H', 'True'),
('ShortestPath', 'NCI-H23', 'True'),
('ShortestPath', 'NCI-H23', 'False'),
('StructuralSP', 'NCI-H23', 'True'),
('StructuralSP', 'NCI-H23', 'False'),
('ShortestPath', 'NCI-H23H', 'True'),
('ShortestPath', 'NCI-H23H', 'False'),
('StructuralSP', 'NCI-H23H', 'True'),
('StructuralSP', 'NCI-H23H', 'False'),
('ShortestPath', 'OVCAR-8', 'True'),
('ShortestPath', 'OVCAR-8', 'False'),
('StructuralSP', 'OVCAR-8', 'True'),
('StructuralSP', 'OVCAR-8', 'False'),
('ShortestPath', 'OVCAR-8H', 'False'),
('StructuralSP', 'OVCAR-8H', 'False'),
('ShortestPath', 'SN12C', 'True'),
('ShortestPath', 'SN12C', 'False'),
('StructuralSP', 'SN12C', 'True'),
('StructuralSP', 'SN12C', 'False'),
('ShortestPath', 'SN12CH', 'True'),
('ShortestPath', 'SN12CH', 'False'),
('ShortestPath', 'SF-295', 'True'),
('ShortestPath', 'SF-295', 'False'),
('StructuralSP', 'SF-295', 'True'),
('StructuralSP', 'SF-295', 'False'),
('ShortestPath', 'SF-295H', 'False'),
('StructuralSP', 'SF-295H', 'False'),
('ShortestPath', 'SW-620', 'True'),
('ShortestPath', 'SW-620', 'False'),
('StructuralSP', 'SW-620', 'True'),
('StructuralSP', 'SW-620', 'False'),
('ShortestPath', 'SW-620H', 'False'),
('StructuralSP', 'SW-620H', 'False'),
('ShortestPath', 'TRIANGLES', 'False'),
('StructuralSP', 'TRIANGLES', 'False'),
('ShortestPath', 'Yeast', 'True'),
('ShortestPath', 'Yeast', 'False'),
('StructuralSP', 'Yeast', 'True'),
('StructuralSP', 'Yeast', 'False'),
('ShortestPath', 'YeastH', 'True'),
('ShortestPath', 'FRANKENSTEIN', 'True'),
('ShortestPath', 'FRANKENSTEIN', 'False'),
('StructuralSP', 'FRANKENSTEIN', 'True'),
('StructuralSP', 'FRANKENSTEIN', 'False'),
('StructuralSP', 'SN12CH', 'True'),
('StructuralSP', 'SN12CH', 'False'),
('ShortestPath', 'UACC257', 'True'),
('ShortestPath', 'UACC257', 'False'),
('StructuralSP', 'UACC257', 'True'),
('StructuralSP', 'UACC257', 'False'),
('ShortestPath', 'UACC257H', 'True'),
('ShortestPath', 'UACC257H', 'False'),
('StructuralSP', 'UACC257H', 'True'),
('StructuralSP', 'UACC257H', 'False'),
('ShortestPath', 'PC-3', 'True'),
('ShortestPath', 'PC-3', 'False'),
('StructuralSP', 'PC-3', 'True'),
('StructuralSP', 'PC-3', 'False'),
('ShortestPath', 'PC-3H', 'True'),
('ShortestPath', 'PC-3H', 'False'),
('StructuralSP', 'PC-3H', 'True'),
('StructuralSP', 'PC-3H', 'False'),
('ShortestPath', 'DBLP_v1', 'False'),
('StructuralSP', 'DBLP_v1', 'True'),
('ShortestPath', 'REDDIT-BINARY', 'False'),
('ShortestPath', 'REDDIT-MULTI-12K', 'False'),
('StructuralSP', 'REDDIT-MULTI-12K', 'False'),
('ShortestPath', 'TWITTER-Real-Graph-Partial', 'True'),
('ShortestPath', 'TWITTER-Real-Graph-Partial', 'False'),
('StructuralSP', 'TWITTER-Real-Graph-Partial', 'True'),
('StructuralSP', 'TWITTER-Real-Graph-Partial', 'False'),
]

MISS_LABEL_LIST = [('StructuralSP', 'GREC', 'True'),
('StructuralSP', 'GREC', 'False'),
('StructuralSP', 'Web', 'True'),
('StructuralSP', 'Web', 'False'),
]


def get_job_script(kernel, dataset, fcsp):
# if (kernel, dataset, fcsp) in OUT_MEM_LIST:
# mem = '2560000'
# else:
mem = '4000'
script = r"""
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="fcsp.space.""" + kernel + r"." + dataset + r"." + fcsp + r""""
#SBATCH --partition=""" + (r"court" if kernel == 'ShortestPath' else r"court") + r"""
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output="outputs/output_fcsp.space.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
#SBATCH --error="errors/error_fcsp.space.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
#
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --time=""" + (r"48" if kernel == 'ShortestPath' else r"48") + r""":00:00
##SBATCH --mem-per-cpu=""" + mem + r"""
#SBATCH --mem=4000

srun hostname
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/thesis/graph_kernels/fcsp
srun python3 compare_fcsp_space.py """ + kernel + r" " + dataset + r" " + fcsp
script = script.strip()
script = re.sub('\n\t+', '\n', script)
script = re.sub('\n +', '\n', script)

return script


def check_task_status(save_dir, *params):
str_task_id = '.' + '.'.join(params)

# Check if the task is in out of memeory or out of space lists or missing labels.
if params in OUT_MEM_LIST or params in OUT_TIME_LIST or params in MISS_LABEL_LIST:
return True

# Check if the task is running or in queue of slurm.
command = 'squeue --user ljia02 --name "fcsp.space' + str_task_id + '" --format "%.2t" --noheader'
stream = os.popen(command)
output = stream.readlines()
if len(output) > 0:
return True

# Check if the task is already computed.
file_name = os.path.join(save_dir, 'space' + str_task_id + '.pkl')
if os.path.isfile(file_name):
with open(file_name, 'rb') as f:
data = pickle.load(f)
if data['completed']:
return True

return False


if __name__ == '__main__':
save_dir = 'outputs/'
os.makedirs(save_dir, exist_ok=True)
os.makedirs('outputs/', exist_ok=True)
os.makedirs('errors/', exist_ok=True)

from sklearn.model_selection import ParameterGrid

Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
'PAH_unlabeled', 'PAH', 'MUTAG', 'Monoterpens',
'Letter-high', 'Letter-med', 'Letter-low',
'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
# new: not so large.
'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR', 'Chiral', 'Vitamin_D',
'ACE', 'Steroid', 'KKI', 'Fingerprint', 'IMDB-BINARY',
'IMDB-MULTI', 'Peking_1', 'Cuneiform', 'OHSU', 'BZR', 'COX2',
'DHFR', 'SYNTHETICnew', 'Synthie', 'SYNTHETIC',
# new: large.
'TWITTER-Real-Graph-Partial', 'GREC', 'Web', 'MCF-7',
'MCF-7H', 'MOLT-4', 'MOLT-4H', 'NCI-H23', 'NCI-H23H',
'OVCAR-8', 'OVCAR-8H', 'P388', 'P388H', 'PC-3', 'PC-3H',
'SF-295', 'SF-295H', 'SN12C', 'SN12CH', 'SW-620', 'SW-620H',
'TRIANGLES', 'UACC257', 'UACC257H', 'Yeast', 'YeastH',
'COLORS-3', 'DBLP_v1', 'REDDIT-MULTI-12K',
'REDDIT-MULTI-12K', 'REDDIT-MULTI-12K',
'REDDIT-MULTI-12K', 'MSRC_9', 'MSRC_21', 'MSRC_21C',
'COLLAB', 'COIL-DEL',
'COIL-RAG', 'PROTEINS', 'PROTEINS_full', 'Mutagenicity',
'REDDIT-BINARY', 'FRANKENSTEIN', 'REDDIT-MULTI-5K',
'REDDIT-MULTI-12K']

Kernel_List = ['ShortestPath', 'StructuralSP']

fcsp_list = ['True', 'False']

task_grid = ParameterGrid({'kernel': Kernel_List[:],
'dataset': Dataset_List[:],
'fcsp': fcsp_list[:]})

from tqdm import tqdm

for task in tqdm(list(task_grid), desc='submitting tasks/jobs'):

if False == check_task_status(save_dir, task['kernel'], task['dataset'], task['fcsp']):
job_script = get_job_script(task['kernel'], task['dataset'], task['fcsp'])
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
# os.popen(command)
# output = stream.readlines()

+ 253
- 0
gklearn/experiments/thesis/graph_kernels/fcsp/shortest_path.py View File

@@ -0,0 +1,253 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 7 15:24:58 2020

@author: ljia

@references:

[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData
Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""

import sys
from itertools import product
# from functools import partial
from gklearn.utils import get_iters
import numpy as np
from gklearn.utils.utils import getSPGraph
from gklearn.kernels import ShortestPath
import os
import pickle
from pympler import asizeof
import time
import networkx as nx


def load_results(file_name, fcsp):
if os.path.isfile(file_name):
with open(file_name, 'rb') as f:
return pickle.load(f)
else:
results = {'nb_comparison': [], 'i': -1, 'j': -1, 'completed': False}
if fcsp:
results['vk_dict_mem'] = []
return results


def save_results(file_name, results):
with open(file_name, 'wb') as f:
pickle.dump(results, f)


def estimate_vk_memory(obj, nb_nodes1, nb_nodes2):
# asizeof.asized(obj, detail=1).format()
# return asizeof.asizeof(obj)
key, val = next(iter(obj.items()))
# key = dict.iterkeys().next()
# key_mem = asizeof.asizeof(key)
dict_flat = sys.getsizeof(obj)
key_mem = 64

if isinstance(val, float):
val_mem = 24
mem = (key_mem + val_mem) * len(obj) + dict_flat + 28 * (nb_nodes1 + nb_nodes2)
else: # value is True or False
mem = (key_mem) * len(obj) + dict_flat + 52 + 28 * (nb_nodes1 + nb_nodes2)

# print(mem, asizeof.asizeof(obj), '\n', asizeof.asized(obj, detail=3).format(), '\n')
return mem


def compute_stats(file_name, results):
del results['i']
del results['j']
results['nb_comparison'] = np.mean(results['nb_comparison'])
results['completed'] = True
if 'vk_dict_mem' in results and len(results['vk_dict_mem']) > 0:
results['vk_dict_mem'] = np.mean(results['vk_dict_mem'])
save_results(file_name, results)


class SPSpace(ShortestPath):

def __init__(self, **kwargs):
super().__init__(**kwargs)
self._file_name = kwargs.get('file_name')

# @profile
def _compute_gm_series(self):
self._all_graphs_have_edges(self._graphs)
# get shortest path graph of each graph.
iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]


results = load_results(self._file_name, self._fcsp)

# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels',
length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))

time0 = time.time()
for i, j in iterator:
if i > results['i'] or (i == results['i'] and j > results['j']):
data = self._sp_do_space(self._graphs[i], self._graphs[j])
if self._fcsp:
results['nb_comparison'].append(data[0])
if data[1] != {}:
results['vk_dict_mem'].append(estimate_vk_memory(data[1],
nx.number_of_nodes(self._graphs[i]),
nx.number_of_nodes(self._graphs[j])))
else:
results['nb_comparison'].append(data)
results['i'] = i
results['j'] = j

time1 = time.time()
if time1 - time0 > 600:
save_results(self._file_name, results)
time0 = time1

compute_stats(self._file_name, results)

return gram_matrix


def _sp_do_space(self, g1, g2):

if self._fcsp: # @todo: it may be put outside the _sp_do().
return self._sp_do_fcsp(g1, g2)
else:
return self._sp_do_naive(g1, g2)


def _sp_do_fcsp(self, g1, g2):

nb_comparison = 0

# compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if len(self._node_labels) > 0: # @todo: it may be put outside the _sp_do().
# node symb and non-synb labeled
if len(self._node_attrs) > 0:
kn = self._node_kernels['mix']
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
nb_comparison += 1
# node symb labeled
else:
kn = self._node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
nb_comparison += 1
else:
# node non-synb labeled
if len(self._node_attrs) > 0:
kn = self._node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
nb_comparison += 1
# node unlabeled
else:
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
pass
# if e1[2]['cost'] == e2[2]['cost']:
# kernel += 1
# nb_comparison += 1

return nb_comparison, vk_dict

# # compute graph kernels
# if self._ds_infos['directed']:
# for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
# if e1[2]['cost'] == e2[2]['cost']:
# nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])]
# kn1 = nk11 * nk22
# kernel += kn1
# else:
# for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
# if e1[2]['cost'] == e2[2]['cost']:
# # each edge walk is counted twice, starting from both its extreme nodes.
# nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
# e1[0], e2[1])], vk_dict[(e1[1], e2[0])], vk_dict[(e1[1], e2[1])]
# kn1 = nk11 * nk22
# kn2 = nk12 * nk21
# kernel += kn1 + kn2


def _sp_do_naive(self, g1, g2):

nb_comparison = 0

# Define the function to compute kernels between vertices in each condition.
if len(self._node_labels) > 0:
# node symb and non-synb labeled
if len(self._node_attrs) > 0:
def compute_vk(n1, n2):
kn = self._node_kernels['mix']
n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
return kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
def compute_vk(n1, n2):
kn = self._node_kernels['symb']
n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
return kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(self._node_attrs) > 0:
def compute_vk(n1, n2):
kn = self._node_kernels['nsymb']
n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
return kn(n1_attrs, n2_attrs)
# node unlabeled
else:
# for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
# if e1[2]['cost'] == e2[2]['cost']:
# kernel += 1
return 0

# compute graph kernels
if self._ds_infos['directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# nk11, nk22 = compute_vk(e1[0], e2[0]), compute_vk(e1[1], e2[1])
# kn1 = nk11 * nk22
# kernel += kn1
nb_comparison += 2
else:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
# nk11, nk12, nk21, nk22 = compute_vk(e1[0], e2[0]), compute_vk(
# e1[0], e2[1]), compute_vk(e1[1], e2[0]), compute_vk(e1[1], e2[1])
# kn1 = nk11 * nk22
# kn2 = nk12 * nk21
# kernel += kn1 + kn2
nb_comparison += 4

return nb_comparison

+ 439
- 0
gklearn/experiments/thesis/graph_kernels/fcsp/structural_sp.py View File

@@ -0,0 +1,439 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 30 11:59:57 2020

@author: ljia

@references:

[1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For
Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
"""
import sys
from itertools import product
from gklearn.utils import get_iters
import numpy as np
import time
import os, errno
import pickle
from pympler import asizeof
import networkx as nx
from gklearn.utils.utils import get_shortest_paths
from gklearn.kernels import StructuralSP


def load_splist(file_name):
if os.path.isfile(file_name):
with open(file_name, 'rb') as f:
return pickle.load(f)
else:
results_path = {'splist': [], 'i': -1, 'completed': False}
return results_path


def load_results(file_name, fcsp):
if os.path.isfile(file_name):
with open(file_name, 'rb') as f:
return pickle.load(f)
else:
results = {'nb_v_comparison': [], 'nb_e_comparison': [], 'i': -1, 'j': -1, 'completed': False}
if fcsp:
results['vk_dict_mem'] = []
results['ek_dict_mem'] = []
return results


def save_results(file_name, results):
with open(file_name, 'wb') as f:
pickle.dump(results, f)


def estimate_vk_memory(obj, nb_nodes1, nb_nodes2):
# asizeof.asized(obj, detail=1).format()
# return asizeof.asizeof(obj)
key, val = next(iter(obj.items()))
# key = dict.iterkeys().next()
# key_mem = asizeof.asizeof(key)
dict_flat = sys.getsizeof(obj)
key_mem = 64

if isinstance(val, float):
val_mem = 24
mem = (key_mem + val_mem) * len(obj) + dict_flat + 28 * (nb_nodes1 + nb_nodes2)
else: # value is True or False
mem = (key_mem) * len(obj) + dict_flat + 52 + 28 * (nb_nodes1 + nb_nodes2)

# print(mem, asizeof.asizeof(obj), '\n', asizeof.asized(obj, detail=3).format(), '\n')
return mem


def estimate_ek_memory(obj, nb_nodes1, nb_nodes2):
# asizeof.asized(obj, detail=1).format()
# return asizeof.asizeof(obj)
key, val = next(iter(obj.items()))
# key = dict.iterkeys().next()
# key_mem = asizeof.asizeof(key)
dict_flat = sys.getsizeof(obj)
key_mem = 192

if isinstance(val, float):
val_mem = 24
mem = (key_mem + val_mem) * len(obj) + dict_flat + 28 * (nb_nodes1 + nb_nodes2)
else: # value is True or False
mem = (key_mem) * len(obj) + dict_flat + 52 + 28 * (nb_nodes1 + nb_nodes2)

# print(mem, asizeof.asizeof(obj), '\n', asizeof.asized(obj, detail=3).format(), '\n')
return mem


def compute_stats(file_name, results, splist):
del results['i']
del results['j']
results['nb_v_comparison'] = np.mean(results['nb_v_comparison'])
# if len(results['nb_e_comparison']) > 0:
results['nb_e_comparison'] = np.mean(results['nb_e_comparison'])
results['completed'] = True
if 'vk_dict_mem' in results and len(results['vk_dict_mem']) > 0:
results['vk_dict_mem'] = np.mean(results['vk_dict_mem'])
if 'ek_dict_mem' in results and len(results['ek_dict_mem']) > 0:
results['ek_dict_mem'] = np.mean(results['ek_dict_mem'])
results['nb_sp_ave'] = np.mean([len(ps) for ps in splist])
results['sp_len_ave'] = np.mean([np.mean([len(p) for p in ps]) for ps in splist])
results['sp_mem_all'] = asizeof.asizeof(splist)
save_results(file_name, results)


class SSPSpace(StructuralSP):

def __init__(self, **kwargs):
super().__init__(**kwargs)
self._file_name = kwargs.get('file_name')

# @profile
def _compute_gm_series(self):
# get shortest paths of each graph in the graphs.
fn_paths = os.path.splitext(self._file_name)[0] + '.paths.pkl'
results_path = load_splist(fn_paths)

if not results_path['completed']:

iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
if self._compute_method == 'trie':
for g in iterator:
splist.append(self._get_sps_as_trie(g))
else:
time0 = time.time()
for i, g in enumerate(iterator):
if i > results_path['i']:
results_path['splist'].append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
results_path['i'] = i

time1 = time.time()
if time1 - time0 > 600:
save_results(fn_paths, results_path)
time0 = time1

del results_path['i']
results_path['completed'] = True
save_results(fn_paths, results_path)

#########
splist = results_path['splist']
results = load_results(self._file_name, self._fcsp)

# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self._verbose >= 2))
if self._compute_method == 'trie':
for i, j in iterator:
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
else:
time0 = time.time()
for i, j in iterator:
if i > results['i'] or (i == results['i'] and j > results['j']):
data = self._ssp_do_naive_space(self._graphs[i], self._graphs[j], splist[i], splist[j])
results['nb_v_comparison'].append(data[0])
results['nb_e_comparison'].append(data[1])
if self._fcsp:
if data[2] != {}:
results['vk_dict_mem'].append(estimate_vk_memory(data[2],
nx.number_of_nodes(self._graphs[i]),
nx.number_of_nodes(self._graphs[j])))
if data[3] != {}:
results['ek_dict_mem'].append(estimate_ek_memory(data[3],
nx.number_of_nodes(self._graphs[i]),
nx.number_of_nodes(self._graphs[j])))
results['i'] = i
results['j'] = j

time1 = time.time()
if time1 - time0 > 600:
save_results(self._file_name, results)
time0 = time1

compute_stats(self._file_name, results, splist)
# @todo: may not remove the path file if the program stops exactly here.
try:
os.remove(fn_paths)
except OSError as e:
if e.errno != errno.ENOENT:
raise

return gram_matrix


def _ssp_do_naive_space(self, g1, g2, spl1, spl2):
if self._fcsp: # @todo: it may be put outside the _sp_do().
return self._sp_do_naive_fcsp(g1, g2, spl1, spl2)
else:
return self._sp_do_naive_naive(g1, g2, spl1, spl2)


def _sp_do_naive_fcsp(self, g1, g2, spl1, spl2):

# First, compute shortest path matrices, method borrowed from FCSP.
vk_dict, nb_v_comparison = self._get_all_node_kernels(g1, g2)
# Then, compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow.
ek_dict, nb_e_comparison = self._get_all_edge_kernels(g1, g2)

return nb_v_comparison, nb_e_comparison, vk_dict, ek_dict


def _sp_do_naive_naive(self, g1, g2, spl1, spl2):

nb_v_comparison = 0
nb_e_comparison = 0

# Define the function to compute kernels between vertices in each condition.
if len(self._node_labels) > 0:
# node symb and non-synb labeled
if len(self._node_attrs) > 0:
def compute_vk(n1, n2):
kn = self._node_kernels['mix']
n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
return kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
def compute_vk(n1, n2):
kn = self._node_kernels['symb']
n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
return kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(self._node_attrs) > 0:
def compute_vk(n1, n2):
kn = self._node_kernels['nsymb']
n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
return kn(n1_attrs, n2_attrs)
# # node unlabeled
# else:
# for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
# if e1[2]['cost'] == e2[2]['cost']:
# kernel += 1
# return kernel

# Define the function to compute kernels between edges in each condition.
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled
if len(self._edge_attrs) > 0:
def compute_ek(e1, e2):
ke = self._edge_kernels['mix']
e1_labels = [g1.edges[e1][el] for el in self._edge_labels]
e2_labels = [g2.edges[e2][el] for el in self._edge_labels]
e1_attrs = [g1.edges[e1][ea] for ea in self._edge_attrs]
e2_attrs = [g2.edges[e2][ea] for ea in self._edge_attrs]
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
# edge symb labeled
else:
def compute_ek(e1, e2):
ke = self._edge_kernels['symb']
e1_labels = [g1.edges[e1][el] for el in self._edge_labels]
e2_labels = [g2.edges[e2][el] for el in self._edge_labels]
return ke(e1_labels, e2_labels)
else:
# edge non-synb labeled
if len(self._edge_attrs) > 0:
def compute_ek(e1, e2):
ke = self._edge_kernels['nsymb']
e1_attrs = [g1.edges[e1][ea] for ea in self._edge_attrs]
e2_attrs = [g2.edges[e2][ea] for ea in self._edge_attrs]
return ke(e1_attrs, e2_attrs)


# compute graph kernels
if len(self._node_labels) > 0 or len(self._node_attrs) > 0:
if len(self._edge_labels) > 0 or len(self._edge_attrs) > 0:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
# nb_v_comparison = len(p1)
# nb_e_comparison = len(p1) - 1
kpath = compute_vk(p1[0], p2[0])
nb_v_comparison += 1
if kpath:
for idx in range(1, len(p1)):
kpath *= compute_vk(p1[idx], p2[idx]) * \
compute_ek((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))
nb_v_comparison += 1
nb_e_comparison += 1
if not kpath:
break
# kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = compute_vk(p1[0], p2[0])
nb_v_comparison += 1
if kpath:
for idx in range(1, len(p1)):
kpath *= compute_vk(p1[idx], p2[idx])
nb_v_comparison += 1
if not kpath:
break
# kernel += kpath # add up kernels of all paths
else:
if len(self._edge_labels) > 0 or len(self._edge_attrs) > 0:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
pass
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= compute_ek((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))
nb_e_comparison += 1
if not kpath:
break
else:
pass
# for p1, p2 in product(spl1, spl2):
# if len(p1) == len(p2):
# kernel += 1
# try:
# kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average
# except ZeroDivisionError:
# print(spl1, spl2)
# print(g1.nodes(data=True))
# print(g1.edges(data=True))
# raise Exception

return nb_v_comparison, nb_e_comparison


def _get_all_node_kernels(self, g1, g2):
nb_comparison = 0

vk_dict = {} # shortest path matrices dict
if len(self._node_labels) > 0:
# node symb and non-synb labeled
if len(self._node_attrs) > 0:
kn = self._node_kernels['mix']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
nb_comparison += 1
# node symb labeled
else:
kn = self._node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
nb_comparison += 1
else:
# node non-synb labeled
if len(self._node_attrs) > 0:
kn = self._node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
nb_comparison += 1
# node unlabeled
else:
pass # @todo: add edge weights.
# for e1 in g1.edges(data=True):
# for e2 in g2.edges(data=True):
# if e1[2]['cost'] == e2[2]['cost']:
# kernel += 1
# return kernel

return vk_dict, nb_comparison


def _get_all_edge_kernels(self, g1, g2):
nb_comparison = 0

# compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
nb_comparison += 1
# edge symb labeled
else:
ke = self._edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
ek_temp = ke(e1_labels, e2_labels)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
nb_comparison += 1
else:
# edge non-synb labeled
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
nb_comparison += 1
# edge unlabeled
else:
pass

return ek_dict, nb_comparison

Loading…
Cancel
Save