|
@@ -29,15 +29,15 @@ def treeletkernel(*args, |
|
|
n_jobs=None, |
|
|
n_jobs=None, |
|
|
chunksize=None, |
|
|
chunksize=None, |
|
|
verbose=True): |
|
|
verbose=True): |
|
|
"""Calculate treelet graph kernels between graphs. |
|
|
|
|
|
|
|
|
"""Compute treelet graph kernels between graphs. |
|
|
|
|
|
|
|
|
Parameters |
|
|
Parameters |
|
|
---------- |
|
|
---------- |
|
|
Gn : List of NetworkX graph |
|
|
Gn : List of NetworkX graph |
|
|
List of graphs between which the kernels are calculated. |
|
|
|
|
|
|
|
|
List of graphs between which the kernels are computed. |
|
|
|
|
|
|
|
|
G1, G2 : NetworkX graphs |
|
|
G1, G2 : NetworkX graphs |
|
|
Two graphs between which the kernel is calculated. |
|
|
|
|
|
|
|
|
Two graphs between which the kernel is computed. |
|
|
|
|
|
|
|
|
sub_kernel : function |
|
|
sub_kernel : function |
|
|
The sub-kernel between 2 real number vectors. Each vector counts the |
|
|
The sub-kernel between 2 real number vectors. Each vector counts the |
|
@@ -89,7 +89,7 @@ def treeletkernel(*args, |
|
|
|
|
|
|
|
|
# ---- use pool.imap_unordered to parallel and track progress. ---- |
|
|
# ---- use pool.imap_unordered to parallel and track progress. ---- |
|
|
if parallel == 'imap_unordered': |
|
|
if parallel == 'imap_unordered': |
|
|
# get all canonical keys of all graphs before calculating kernels to save |
|
|
|
|
|
|
|
|
# get all canonical keys of all graphs before computing kernels to save |
|
|
# time, but this may cost a lot of memory for large dataset. |
|
|
# time, but this may cost a lot of memory for large dataset. |
|
|
pool = Pool(n_jobs) |
|
|
pool = Pool(n_jobs) |
|
|
itr = zip(Gn, range(0, len(Gn))) |
|
|
itr = zip(Gn, range(0, len(Gn))) |
|
@@ -120,8 +120,8 @@ def treeletkernel(*args, |
|
|
glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) |
|
|
glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) |
|
|
|
|
|
|
|
|
# ---- do not use parallelization. ---- |
|
|
# ---- do not use parallelization. ---- |
|
|
elif parallel == None: |
|
|
|
|
|
# get all canonical keys of all graphs before calculating kernels to save |
|
|
|
|
|
|
|
|
elif parallel is None: |
|
|
|
|
|
# get all canonical keys of all graphs before computing kernels to save |
|
|
# time, but this may cost a lot of memory for large dataset. |
|
|
# time, but this may cost a lot of memory for large dataset. |
|
|
canonkeys = [] |
|
|
canonkeys = [] |
|
|
for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): |
|
|
for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): |
|
@@ -148,7 +148,7 @@ def treeletkernel(*args, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): |
|
|
def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): |
|
|
"""Calculate treelet graph kernel between 2 graphs. |
|
|
|
|
|
|
|
|
"""Compute treelet graph kernel between 2 graphs. |
|
|
|
|
|
|
|
|
Parameters |
|
|
Parameters |
|
|
---------- |
|
|
---------- |
|
@@ -210,7 +210,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed): |
|
|
|
|
|
|
|
|
# n-star patterns |
|
|
# n-star patterns |
|
|
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] |
|
|
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] |
|
|
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] |
|
|
|
|
|
|
|
|
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] # @todo: check self loop. |
|
|
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] |
|
|
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] |
|
|
# n-star patterns |
|
|
# n-star patterns |
|
|
canonkey['6'] = len(patterns['3star']) |
|
|
canonkey['6'] = len(patterns['3star']) |
|
|