You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graphdataset.py 7.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. """ Obtain all kinds of attributes of a graph dataset.
  2. """
  3. def get_dataset_attributes(Gn,
  4. target=None,
  5. attr_names=[],
  6. node_label=None,
  7. edge_label=None):
  8. import networkx as nx
  9. import numpy as np
  10. attrs = {}
  11. def get_dataset_size(Gn):
  12. return len(Gn)
  13. def get_all_graph_size(Gn):
  14. return [nx.number_of_nodes(G) for G in Gn]
  15. def get_ave_graph_size(all_graph_size):
  16. return np.mean(all_graph_size)
  17. def get_min_graph_size(all_graph_size):
  18. return np.amin(all_graph_size)
  19. def get_max_graph_size(Gn):
  20. return np.amax(all_graph_size)
  21. def get_all_graph_edge_num(Gn):
  22. return [nx.number_of_edges(G) for G in Gn]
  23. def get_ave_graph_edge_num(all_graph_edge_num):
  24. return np.mean(all_graph_edge_num)
  25. def get_min_graph_edge_num(all_graph_edge_num):
  26. return np.amin(all_graph_edge_num)
  27. def get_max_graph_edge_num(all_graph_edge_num):
  28. return np.amax(all_graph_edge_num)
  29. def is_node_labeled(Gn):
  30. return False if node_label is None else True
  31. def get_node_label_num(Gn):
  32. nl = set()
  33. for G in Gn:
  34. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  35. return len(nl)
  36. def is_edge_labeled(Gn):
  37. return False if edge_label is None else True
  38. def get_edge_label_num(Gn):
  39. nl = set()
  40. for G in Gn:
  41. nl = nl | set(nx.get_edge_attributes(G, edge_label).values())
  42. return len(nl)
  43. def is_directed(Gn):
  44. return nx.is_directed(Gn[0])
  45. def get_ave_graph_degree(Gn):
  46. return np.mean([np.amax(list(dict(G.degree()).values())) for G in Gn])
  47. def get_max_graph_degree(Gn):
  48. return np.amax([np.amax(list(dict(G.degree()).values())) for G in Gn])
  49. def get_min_graph_degree(Gn):
  50. return np.amin([np.amax(list(dict(G.degree()).values())) for G in Gn])
  51. def get_substructures(Gn):
  52. subs = set()
  53. for G in Gn:
  54. degrees = list(dict(G.degree()).values())
  55. if any(i == 2 for i in degrees):
  56. subs.add('linear')
  57. if np.amax(degrees) >= 3:
  58. subs.add('non linear')
  59. if 'linear' in subs and 'non linear' in subs:
  60. break
  61. if is_directed(Gn):
  62. for G in Gn:
  63. if len(list(nx.find_cycle(G))) > 0:
  64. subs.add('cyclic')
  65. break
  66. # else:
  67. # # @todo: this method does not work for big graph with large amount of edges like D&D, try a better way.
  68. # upper = np.amin([nx.number_of_edges(G) for G in Gn]) * 2 + 10
  69. # for G in Gn:
  70. # if (nx.number_of_edges(G) < upper):
  71. # cyc = list(nx.simple_cycles(G.to_directed()))
  72. # if any(len(i) > 2 for i in cyc):
  73. # subs.add('cyclic')
  74. # break
  75. # if 'cyclic' not in subs:
  76. # for G in Gn:
  77. # cyc = list(nx.simple_cycles(G.to_directed()))
  78. # if any(len(i) > 2 for i in cyc):
  79. # subs.add('cyclic')
  80. # break
  81. return subs
  82. def get_class_num(target):
  83. return len(set(target))
  84. def get_node_attr_dim(Gn):
  85. attrs = Gn[0].nodes[0]
  86. if 'attributes' in attrs:
  87. return len(attrs['attributes'])
  88. else:
  89. return 0
  90. def get_edge_attr_dim(Gn):
  91. for G in Gn:
  92. if nx.number_of_edges(G) > 0:
  93. for e in G.edges(data=True):
  94. if 'attributes' in e[2]:
  95. return len(e[2]['attributes'])
  96. else:
  97. return 0
  98. return 0
  99. if attr_names == []:
  100. attr_names = [
  101. 'substructures',
  102. 'node_labeled',
  103. 'edge_labeled',
  104. 'is_directed',
  105. 'dataset_size',
  106. 'ave_graph_size',
  107. 'min_graph_size',
  108. 'max_graph_size',
  109. 'ave_graph_edge_num',
  110. 'min_graph_edge_num',
  111. 'max_graph_edge_num',
  112. 'ave_graph_degree',
  113. 'min_graph_degree',
  114. 'max_graph_degree',
  115. 'node_label_num',
  116. 'edge_label_num',
  117. 'node_attr_dim',
  118. 'edge_attr_dim',
  119. 'class_number',
  120. ]
  121. # dataset size
  122. if 'dataset_size' in attr_names:
  123. attrs.update({'dataset_size': get_dataset_size(Gn)})
  124. # graph size
  125. if any(i in attr_names
  126. for i in ['ave_graph_size', 'min_graph_size', 'max_graph_size']):
  127. all_graph_size = get_all_graph_size(Gn)
  128. if 'ave_graph_size' in attr_names:
  129. attrs.update({'ave_graph_size': get_ave_graph_size(all_graph_size)})
  130. if 'min_graph_size' in attr_names:
  131. attrs.update({'min_graph_size': get_min_graph_size(all_graph_size)})
  132. if 'max_graph_size' in attr_names:
  133. attrs.update({'max_graph_size': get_max_graph_size(all_graph_size)})
  134. # graph edge number
  135. if any(i in attr_names for i in
  136. ['ave_graph_edge_num', 'min_graph_edge_num', 'max_graph_edge_num']):
  137. all_graph_edge_num = get_all_graph_edge_num(Gn)
  138. if 'ave_graph_edge_num' in attr_names:
  139. attrs.update({
  140. 'ave_graph_edge_num':
  141. get_ave_graph_edge_num(all_graph_edge_num)
  142. })
  143. if 'max_graph_edge_num' in attr_names:
  144. attrs.update({
  145. 'max_graph_edge_num':
  146. get_max_graph_edge_num(all_graph_edge_num)
  147. })
  148. if 'min_graph_edge_num' in attr_names:
  149. attrs.update({
  150. 'min_graph_edge_num':
  151. get_min_graph_edge_num(all_graph_edge_num)
  152. })
  153. # label number
  154. if any(i in attr_names for i in ['node_labeled', 'node_label_num']):
  155. is_nl = is_node_labeled(Gn)
  156. node_label_num = get_node_label_num(Gn)
  157. if 'node_labeled' in attr_names:
  158. # graphs are considered node unlabeled if all nodes have the same label.
  159. attrs.update({'node_labeled': is_nl if node_label_num > 1 else False})
  160. if 'node_label_num' in attr_names:
  161. attrs.update({'node_label_num': node_label_num})
  162. if any(i in attr_names for i in ['edge_labeled', 'edge_label_num']):
  163. is_el = is_edge_labeled(Gn)
  164. edge_label_num = get_edge_label_num(Gn)
  165. if 'edge_labeled' in attr_names:
  166. # graphs are considered edge unlabeled if all edges have the same label.
  167. attrs.update({'edge_labeled': is_el if edge_label_num > 1 else False})
  168. if 'edge_label_num' in attr_names:
  169. attrs.update({'edge_label_num': edge_label_num})
  170. if 'is_directed' in attr_names:
  171. attrs.update({'is_directed': is_directed(Gn)})
  172. if 'ave_graph_degree' in attr_names:
  173. attrs.update({'ave_graph_degree': get_ave_graph_degree(Gn)})
  174. if 'max_graph_degree' in attr_names:
  175. attrs.update({'max_graph_degree': get_max_graph_degree(Gn)})
  176. if 'min_graph_degree' in attr_names:
  177. attrs.update({'min_graph_degree': get_min_graph_degree(Gn)})
  178. if 'substructures' in attr_names:
  179. attrs.update({'substructures': get_substructures(Gn)})
  180. if 'class_number' in attr_names:
  181. attrs.update({'class_number': get_class_num(target)})
  182. if 'node_attr_dim' in attr_names:
  183. attrs['node_attr_dim'] = get_node_attr_dim(Gn)
  184. if 'edge_attr_dim' in attr_names:
  185. attrs['edge_attr_dim'] = get_edge_attr_dim(Gn)
  186. from collections import OrderedDict
  187. return OrderedDict(
  188. sorted(attrs.items(), key=lambda i: attr_names.index(i[0])))

A Python package for graph kernels, graph edit distances and graph pre-image problem.