You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

median_preimege_generator.py 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # -*- coding: utf-8 -*-
  2. """example_median_preimege_generator.ipynb
  3. Automatically generated by Colaboratory.
  4. Original file is located at
  5. https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK
  6. **This script demonstrates how to generate a graph preimage using Boria's method.**
  7. ---
  8. """
  9. """**1. Get dataset.**"""
  10. from gklearn.utils import Dataset, split_dataset_by_target
  11. # Predefined dataset name, use dataset "MAO".
  12. ds_name = 'MAO'
  13. # The node/edge labels that will not be used in the computation.
  14. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  15. # Initialize a Dataset.
  16. dataset_all = Dataset()
  17. # Load predefined dataset "MAO".
  18. dataset_all.load_predefined_dataset(ds_name)
  19. # Remove irrelevant labels.
  20. dataset_all.remove_labels(**irrelevant_labels)
  21. # Split the whole dataset according to the classification targets.
  22. datasets = split_dataset_by_target(dataset_all)
  23. # Get the first class of graphs, whose median preimage will be computed.
  24. dataset = datasets[0]
  25. len(dataset.graphs)
  26. """**2. Set parameters.**"""
  27. import multiprocessing
  28. # Parameters for MedianPreimageGenerator (our method).
  29. mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
  30. 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
  31. 'ds_name': ds_name, # name of the dataset.
  32. 'parallel': True, # whether the parallel scheme is to be used.
  33. 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
  34. 'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
  35. 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
  36. 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
  37. 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
  38. 'verbose': 2 # whether to print out results.
  39. }
  40. # Parameters for graph kernel computation.
  41. kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
  42. 'depth': 9,
  43. 'k_func': 'MinMax',
  44. 'compute_method': 'trie',
  45. 'parallel': 'imap_unordered', # or None
  46. 'n_jobs': multiprocessing.cpu_count(),
  47. 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
  48. 'verbose': 2 # whether to print out results.
  49. }
  50. # Parameters for GED computation.
  51. ged_options = {'method': 'IPFP', # use IPFP huristic.
  52. 'initialization_method': 'RANDOM', # or 'NODE', etc.
  53. 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
  54. 'edit_cost': 'CONSTANT', # use CONSTANT cost.
  55. 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance.
  56. 'ratio_runs_from_initial_solutions': 1,
  57. 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
  58. 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
  59. }
  60. # Parameters for MedianGraphEstimator (Boria's method).
  61. mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
  62. 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
  63. 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
  64. 'verbose': 2, # whether to print out results.
  65. 'refine': False # whether to refine the final SODs or not.
  66. }
  67. print('done.')
  68. """**3. Run median preimage generator.**"""
  69. from gklearn.preimage import MedianPreimageGenerator
  70. # Create median preimage generator instance.
  71. mpg = MedianPreimageGenerator()
  72. # Add dataset.
  73. mpg.dataset = dataset
  74. # Set parameters.
  75. mpg.set_options(**mpg_options.copy())
  76. mpg.kernel_options = kernel_options.copy()
  77. mpg.ged_options = ged_options.copy()
  78. mpg.mge_options = mge_options.copy()
  79. # Run.
  80. mpg.run()
  81. """**4. Get results.**"""
  82. # Get results.
  83. import pprint
  84. pp = pprint.PrettyPrinter(indent=4) # pretty print
  85. results = mpg.get_results()
  86. pp.pprint(results)
  87. # Draw generated graphs.
  88. def draw_graph(graph):
  89. import matplotlib.pyplot as plt
  90. import networkx as nx
  91. plt.figure()
  92. pos = nx.spring_layout(graph)
  93. nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
  94. plt.show()
  95. plt.clf()
  96. plt.close()
  97. draw_graph(mpg.set_median)
  98. draw_graph(mpg.gen_median)

A Python package for graph kernels, graph edit distances and graph pre-image problem.