You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graphdataset.py 7.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. """ Obtain all kinds of attributes of a graph dataset.
  2. """
  3. def get_dataset_attributes(Gn,
  4. target=None,
  5. attr_names=[],
  6. node_label=None,
  7. edge_label=None):
  8. import networkx as nx
  9. import numpy as np
  10. attrs = {}
  11. def get_dataset_size(Gn):
  12. return len(Gn)
  13. def get_all_node_num(Gn):
  14. return [nx.number_of_nodes(G) for G in Gn]
  15. def get_ave_node_num(all_node_num):
  16. return np.mean(all_node_num)
  17. def get_min_node_num(all_node_num):
  18. return np.amin(all_node_num)
  19. def get_max_node_num(all_node_num):
  20. return np.amax(all_node_num)
  21. def get_all_edge_num(Gn):
  22. return [nx.number_of_edges(G) for G in Gn]
  23. def get_ave_edge_num(all_edge_num):
  24. return np.mean(all_edge_num)
  25. def get_min_edge_num(all_edge_num):
  26. return np.amin(all_edge_num)
  27. def get_max_edge_num(all_edge_num):
  28. return np.amax(all_edge_num)
  29. def is_node_labeled(Gn):
  30. return False if node_label is None else True
  31. def get_node_label_num(Gn):
  32. nl = set()
  33. for G in Gn:
  34. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  35. return len(nl)
  36. def is_edge_labeled(Gn):
  37. return False if edge_label is None else True
  38. def get_edge_label_num(Gn):
  39. nl = set()
  40. for G in Gn:
  41. nl = nl | set(nx.get_edge_attributes(G, edge_label).values())
  42. return len(nl)
  43. def is_directed(Gn):
  44. return nx.is_directed(Gn[0])
  45. def get_ave_node_degree(Gn):
  46. return np.mean([np.amax(list(dict(G.degree()).values())) for G in Gn])
  47. def get_max_node_degree(Gn):
  48. return np.amax([np.amax(list(dict(G.degree()).values())) for G in Gn])
  49. def get_min_node_degree(Gn):
  50. return np.amin([np.amax(list(dict(G.degree()).values())) for G in Gn])
  51. def get_substructures(Gn):
  52. subs = set()
  53. for G in Gn:
  54. degrees = list(dict(G.degree()).values())
  55. if any(i == 2 for i in degrees):
  56. subs.add('linear')
  57. if np.amax(degrees) >= 3:
  58. subs.add('non linear')
  59. if 'linear' in subs and 'non linear' in subs:
  60. break
  61. if is_directed(Gn):
  62. for G in Gn:
  63. if len(list(nx.find_cycle(G))) > 0:
  64. subs.add('cyclic')
  65. break
  66. # else:
  67. # # @todo: this method does not work for big graph with large amount of edges like D&D, try a better way.
  68. # upper = np.amin([nx.number_of_edges(G) for G in Gn]) * 2 + 10
  69. # for G in Gn:
  70. # if (nx.number_of_edges(G) < upper):
  71. # cyc = list(nx.simple_cycles(G.to_directed()))
  72. # if any(len(i) > 2 for i in cyc):
  73. # subs.add('cyclic')
  74. # break
  75. # if 'cyclic' not in subs:
  76. # for G in Gn:
  77. # cyc = list(nx.simple_cycles(G.to_directed()))
  78. # if any(len(i) > 2 for i in cyc):
  79. # subs.add('cyclic')
  80. # break
  81. return subs
  82. def get_class_num(target):
  83. return len(set(target))
  84. def get_node_attr_dim(Gn):
  85. for G in Gn:
  86. for n in G.nodes(data=True):
  87. if 'attributes' in n[1]:
  88. return len(n[1]['attributes'])
  89. return 0
  90. def get_edge_attr_dim(Gn):
  91. for G in Gn:
  92. if nx.number_of_edges(G) > 0:
  93. for e in G.edges(data=True):
  94. if 'attributes' in e[2]:
  95. return len(e[2]['attributes'])
  96. return 0
  97. if attr_names == []:
  98. attr_names = [
  99. 'substructures',
  100. 'node_labeled',
  101. 'edge_labeled',
  102. 'is_directed',
  103. 'dataset_size',
  104. 'ave_node_num',
  105. 'min_node_num',
  106. 'max_node_num',
  107. 'ave_edge_num',
  108. 'min_edge_num',
  109. 'max_edge_num',
  110. 'ave_node_degree',
  111. 'min_node_degree',
  112. 'max_node_degree',
  113. 'node_label_num',
  114. 'edge_label_num',
  115. 'node_attr_dim',
  116. 'edge_attr_dim',
  117. 'class_number',
  118. ]
  119. # dataset size
  120. if 'dataset_size' in attr_names:
  121. attrs.update({'dataset_size': get_dataset_size(Gn)})
  122. # graph node number
  123. if any(i in attr_names
  124. for i in ['ave_node_num', 'min_node_num', 'max_node_num']):
  125. all_node_num = get_all_node_num(Gn)
  126. if 'ave_node_num' in attr_names:
  127. attrs.update({'ave_node_num': get_ave_node_num(all_node_num)})
  128. if 'min_node_num' in attr_names:
  129. attrs.update({'min_node_num': get_min_node_num(all_node_num)})
  130. if 'max_node_num' in attr_names:
  131. attrs.update({'max_node_num': get_max_node_num(all_node_num)})
  132. # graph edge number
  133. if any(i in attr_names for i in
  134. ['ave_edge_num', 'min_edge_num', 'max_edge_num']):
  135. all_edge_num = get_all_edge_num(Gn)
  136. if 'ave_edge_num' in attr_names:
  137. attrs.update({'ave_edge_num': get_ave_edge_num(all_edge_num)})
  138. if 'max_edge_num' in attr_names:
  139. attrs.update({'max_edge_num': get_max_edge_num(all_edge_num)})
  140. if 'min_edge_num' in attr_names:
  141. attrs.update({'min_edge_num': get_min_edge_num(all_edge_num)})
  142. # label number
  143. if any(i in attr_names for i in ['node_labeled', 'node_label_num']):
  144. is_nl = is_node_labeled(Gn)
  145. node_label_num = get_node_label_num(Gn)
  146. if 'node_labeled' in attr_names:
  147. # graphs are considered node unlabeled if all nodes have the same label.
  148. attrs.update({'node_labeled': is_nl if node_label_num > 1 else False})
  149. if 'node_label_num' in attr_names:
  150. attrs.update({'node_label_num': node_label_num})
  151. if any(i in attr_names for i in ['edge_labeled', 'edge_label_num']):
  152. is_el = is_edge_labeled(Gn)
  153. edge_label_num = get_edge_label_num(Gn)
  154. if 'edge_labeled' in attr_names:
  155. # graphs are considered edge unlabeled if all edges have the same label.
  156. attrs.update({'edge_labeled': is_el if edge_label_num > 1 else False})
  157. if 'edge_label_num' in attr_names:
  158. attrs.update({'edge_label_num': edge_label_num})
  159. if 'is_directed' in attr_names:
  160. attrs.update({'is_directed': is_directed(Gn)})
  161. if 'ave_node_degree' in attr_names:
  162. attrs.update({'ave_node_degree': get_ave_node_degree(Gn)})
  163. if 'max_node_degree' in attr_names:
  164. attrs.update({'max_node_degree': get_max_node_degree(Gn)})
  165. if 'min_node_degree' in attr_names:
  166. attrs.update({'min_node_degree': get_min_node_degree(Gn)})
  167. if 'substructures' in attr_names:
  168. attrs.update({'substructures': get_substructures(Gn)})
  169. if 'class_number' in attr_names:
  170. attrs.update({'class_number': get_class_num(target)})
  171. if 'node_attr_dim' in attr_names:
  172. attrs['node_attr_dim'] = get_node_attr_dim(Gn)
  173. if 'edge_attr_dim' in attr_names:
  174. attrs['edge_attr_dim'] = get_edge_attr_dim(Gn)
  175. from collections import OrderedDict
  176. return OrderedDict(
  177. sorted(attrs.items(), key=lambda i: attr_names.index(i[0])))

A Python package for graph kernels, graph edit distances and graph pre-image problem.