You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ged_data.py 8.2 kB

5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Jun 17 15:05:01 2020
  5. @author: ljia
  6. """
  7. from gklearn.ged.env import Options, OptionsStringMap
  8. from gklearn.ged.edit_costs import Constant
  9. from gklearn.utils import SpecialLabel, dummy_node
  10. class GEDData(object):
  11. def __init__(self):
  12. self._graphs = []
  13. self._graph_names = []
  14. self._graph_classes = []
  15. self._num_graphs_without_shuffled_copies = 0
  16. self._strings_to_internal_node_ids = []
  17. self._internal_node_ids_to_strings = []
  18. self._edit_cost = None
  19. self._node_costs = None
  20. self._edge_costs = None
  21. self._node_labels = []
  22. self._edge_labels = []
  23. self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES
  24. self._delete_edit_cost = True
  25. self._max_num_nodes = 0
  26. self._max_num_edges = 0
  27. def num_graphs(self):
  28. """
  29. /*!
  30. * @brief Returns the number of graphs.
  31. * @return Number of graphs in the instance.
  32. */
  33. """
  34. return len(self._graphs)
  35. def graph(self, graph_id):
  36. """
  37. /*!
  38. * @brief Provides access to a graph.
  39. * @param[in] graph_id The ID of the graph.
  40. * @return Constant reference to the graph with ID @p graph_id.
  41. */
  42. """
  43. return self._graphs[graph_id]
  44. def shuffled_graph_copies_available(self):
  45. """
  46. /*!
  47. * @brief Checks if shuffled graph copies are available.
  48. * @return Boolean @p true if shuffled graph copies are available.
  49. */
  50. """
  51. return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES)
  52. def num_graphs_without_shuffled_copies(self):
  53. """
  54. /*!
  55. * @brief Returns the number of graphs in the instance without the shuffled copies.
  56. * @return Number of graphs without shuffled copies contained in the instance.
  57. */
  58. """
  59. return self._num_graphs_without_shuffled_copies
  60. def node_cost(self, label1, label2):
  61. """
  62. /*!
  63. * @brief Returns node relabeling, insertion, or deletion cost.
  64. * @param[in] label1 First node label.
  65. * @param[in] label2 Second node label.
  66. * @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(),
  67. * node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not,
  68. * node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does,
  69. * and 0 otherwise.
  70. */
  71. """
  72. if self._eager_init(): # @todo: check if correct
  73. return self._node_costs[label1, label2]
  74. if label1 == label2:
  75. return 0
  76. if label1 == SpecialLabel.DUMMY: # @todo: check dummy
  77. return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
  78. if label2 == SpecialLabel.DUMMY: # @todo: check dummy
  79. return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
  80. return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
  81. def edge_cost(self, label1, label2):
  82. """
  83. /*!
  84. * @brief Returns edge relabeling, insertion, or deletion cost.
  85. * @param[in] label1 First edge label.
  86. * @param[in] label2 Second edge label.
  87. * @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(),
  88. * edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not,
  89. * edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does,
  90. * and 0 otherwise.
  91. */
  92. """
  93. if self._eager_init(): # @todo: check if correct
  94. return self._node_costs[label1, label2]
  95. if label1 == label2:
  96. return 0
  97. if label1 == SpecialLabel.DUMMY:
  98. return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1])
  99. if label2 == SpecialLabel.DUMMY:
  100. return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1])
  101. return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1])
  102. def compute_induced_cost(self, g, h, node_map):
  103. """
  104. /*!
  105. * @brief Computes the edit cost between two graphs induced by a node map.
  106. * @param[in] g Input graph.
  107. * @param[in] h Input graph.
  108. * @param[in,out] node_map Node map whose induced edit cost is to be computed.
  109. */
  110. """
  111. cost = 0
  112. # collect node costs
  113. for node in g.nodes():
  114. image = node_map.image(node)
  115. label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label'])
  116. cost += self.node_cost(g.nodes[node]['label'], label2)
  117. for node in h.nodes():
  118. pre_image = node_map.pre_image(node)
  119. if pre_image == dummy_node():
  120. cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label'])
  121. # collect edge costs
  122. for (n1, n2) in g.edges():
  123. image1 = node_map.image(n1)
  124. image2 = node_map.image(n2)
  125. label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY)
  126. cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2)
  127. for (n1, n2) in h.edges():
  128. if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)):
  129. cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label'])
  130. node_map.set_induced_cost(cost)
  131. def _set_edit_cost(self, edit_cost, edit_cost_constants):
  132. if self._delete_edit_cost:
  133. self._edit_cost = None
  134. if isinstance(edit_cost, str):
  135. edit_cost = OptionsStringMap.EditCosts[edit_cost]
  136. if edit_cost == Options.EditCosts.CHEM_1:
  137. if len(edit_cost_constants) == 4:
  138. self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3])
  139. elif len(edit_cost_constants) == 0:
  140. self._edit_cost = CHEM1()
  141. else:
  142. raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.')
  143. elif edit_cost == Options.EditCosts.LETTER:
  144. if len(edit_cost_constants) == 3:
  145. self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2])
  146. elif len(edit_cost_constants) == 0:
  147. self._edit_cost = Letter()
  148. else:
  149. raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.')
  150. elif edit_cost == Options.EditCosts.LETTER2:
  151. if len(edit_cost_constants) == 5:
  152. self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4])
  153. elif len(edit_cost_constants) == 0:
  154. self._edit_cost = Letter2()
  155. else:
  156. raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.')
  157. elif edit_cost == Options.EditCosts.NON_SYMBOLIC:
  158. if len(edit_cost_constants) == 6:
  159. self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5])
  160. elif len(edit_cost_constants) == 0:
  161. self._edit_cost = NonSymbolic()
  162. else:
  163. raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.')
  164. elif edit_cost == Options.EditCosts.CONSTANT:
  165. if len(edit_cost_constants) == 6:
  166. self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5])
  167. elif len(edit_cost_constants) == 0:
  168. self._edit_cost = Constant()
  169. else:
  170. raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.')
  171. self._delete_edit_cost = True
  172. def _node_label_to_id(self, node_label):
  173. n_id = 0
  174. for n_l in self._node_labels:
  175. if n_l == node_label:
  176. return n_id + 1
  177. n_id += 1
  178. self._node_labels.append(node_label)
  179. return n_id + 1
  180. def _edge_label_to_id(self, edge_label):
  181. e_id = 0
  182. for e_l in self._edge_labels:
  183. if e_l == edge_label:
  184. return e_id + 1
  185. e_id += 1
  186. self._edge_labels.append(edge_label)
  187. return e_id + 1
  188. def _eager_init(self):
  189. return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES)

A Python package for graph kernels, graph edit distances and graph pre-image problem.