You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graphfiles.html 121 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811
  1. <!DOCTYPE html>
  2. <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  3. <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  4. <head>
  5. <meta charset="utf-8">
  6. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7. <title>pygraph.utils.graphfiles &mdash; py-graph documentation</title>
  8. <script type="text/javascript" src="../../../_static/js/modernizr.min.js"></script>
  9. <script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
  10. <script type="text/javascript" src="../../../_static/jquery.js"></script>
  11. <script type="text/javascript" src="../../../_static/underscore.js"></script>
  12. <script type="text/javascript" src="../../../_static/doctools.js"></script>
  13. <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
  14. <script type="text/javascript" src="../../../_static/js/theme.js"></script>
  15. <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
  16. <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
  17. <link rel="index" title="Index" href="../../../genindex.html" />
  18. <link rel="search" title="Search" href="../../../search.html" />
  19. </head>
  20. <body class="wy-body-for-nav">
  21. <div class="wy-grid-for-nav">
  22. <nav data-toggle="wy-nav-shift" class="wy-nav-side">
  23. <div class="wy-side-scroll">
  24. <div class="wy-side-nav-search" >
  25. <a href="../../../index.html" class="icon icon-home"> py-graph
  26. </a>
  27. <div class="version">
  28. 1.0
  29. </div>
  30. <div role="search">
  31. <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
  32. <input type="text" name="q" placeholder="Search docs" />
  33. <input type="hidden" name="check_keywords" value="yes" />
  34. <input type="hidden" name="area" value="default" />
  35. </form>
  36. </div>
  37. </div>
  38. <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
  39. <!-- Local TOC -->
  40. <div class="local-toc"></div>
  41. </div>
  42. </div>
  43. </nav>
  44. <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
  45. <nav class="wy-nav-top" aria-label="top navigation">
  46. <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
  47. <a href="../../../index.html">py-graph</a>
  48. </nav>
  49. <div class="wy-nav-content">
  50. <div class="rst-content">
  51. <div role="navigation" aria-label="breadcrumbs navigation">
  52. <ul class="wy-breadcrumbs">
  53. <li><a href="../../../index.html">Docs</a> &raquo;</li>
  54. <li><a href="../../index.html">Module code</a> &raquo;</li>
  55. <li>pygraph.utils.graphfiles</li>
  56. <li class="wy-breadcrumbs-aside">
  57. </li>
  58. </ul>
  59. <hr/>
  60. </div>
  61. <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
  62. <div itemprop="articleBody">
  63. <h1>Source code for pygraph.utils.graphfiles</h1><div class="highlight"><pre>
  64. <span></span><span class="sd">&quot;&quot;&quot; Utilities function to manage graph files</span>
  65. <span class="sd">&quot;&quot;&quot;</span>
  66. <span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">dirname</span><span class="p">,</span> <span class="n">splitext</span>
  67. <div class="viewcode-block" id="loadCT"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadCT">[docs]</a><span class="k">def</span> <span class="nf">loadCT</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
  68. <span class="sd">&quot;&quot;&quot;load data from a Chemical Table (.ct) file.</span>
  69. <span class="sd"> Notes</span>
  70. <span class="sd"> ------</span>
  71. <span class="sd"> a typical example of data in .ct is like this:</span>
  72. <span class="sd"> 3 2 &lt;- number of nodes and edges</span>
  73. <span class="sd"> 0.0000 0.0000 0.0000 C &lt;- each line describes a node (x,y,z + label)</span>
  74. <span class="sd"> 0.0000 0.0000 0.0000 C</span>
  75. <span class="sd"> 0.0000 0.0000 0.0000 O</span>
  76. <span class="sd"> 1 3 1 1 &lt;- each line describes an edge : to, from, bond type, bond stereo</span>
  77. <span class="sd"> 2 3 1 1</span>
  78. <span class="sd"> </span>
  79. <span class="sd"> Check https://www.google.com/url?sa=t&amp;rct=j&amp;q=&amp;esrc=s&amp;source=web&amp;cd=10&amp;ved=2ahUKEwivhaSdjsTlAhVhx4UKHczHA8gQFjAJegQIARAC&amp;url=https%3A%2F%2Fwww.daylight.com%2Fmeetings%2Fmug05%2FKappler%2Fctfile.pdf&amp;usg=AOvVaw1cDNrrmMClkFPqodlF2inS</span>
  80. <span class="sd"> for detailed format discription.</span>
  81. <span class="sd"> &quot;&quot;&quot;</span>
  82. <span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>
  83. <span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">basename</span>
  84. <span class="n">g</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">()</span>
  85. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  86. <span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  87. <span class="n">g</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">(</span>
  88. <span class="n">name</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">content</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span>
  89. <span class="n">filename</span> <span class="o">=</span> <span class="n">basename</span><span class="p">(</span><span class="n">filename</span><span class="p">))</span> <span class="c1"># set name of the graph</span>
  90. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
  91. <span class="k">if</span> <span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;&#39;</span><span class="p">:</span>
  92. <span class="n">nb_nodes</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="c1"># number of the nodes</span>
  93. <span class="n">nb_edges</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span> <span class="c1"># number of the edges</span>
  94. <span class="k">else</span><span class="p">:</span>
  95. <span class="n">nb_nodes</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
  96. <span class="n">nb_edges</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
  97. <span class="c1"># patch for compatibility : label will be removed later</span>
  98. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nb_nodes</span><span class="p">):</span>
  99. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
  100. <span class="n">tmp</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">tmp</span> <span class="k">if</span> <span class="n">x</span> <span class="o">!=</span> <span class="s1">&#39;&#39;</span><span class="p">]</span>
  101. <span class="n">g</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">atom</span><span class="o">=</span><span class="n">tmp</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
  102. <span class="n">label</span><span class="o">=</span><span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">tmp</span><span class="p">[</span><span class="mi">3</span><span class="p">:]],</span>
  103. <span class="n">attributes</span><span class="o">=</span><span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]])</span>
  104. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nb_edges</span><span class="p">):</span>
  105. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="n">g</span><span class="o">.</span><span class="n">number_of_nodes</span><span class="p">()</span> <span class="o">+</span> <span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
  106. <span class="n">tmp</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">tmp</span> <span class="k">if</span> <span class="n">x</span> <span class="o">!=</span> <span class="s1">&#39;&#39;</span><span class="p">]</span>
  107. <span class="n">g</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span>
  108. <span class="n">bond_type</span><span class="o">=</span><span class="n">tmp</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
  109. <span class="n">label</span><span class="o">=</span><span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">tmp</span><span class="p">[</span><span class="mi">2</span><span class="p">:]])</span>
  110. <span class="k">return</span> <span class="n">g</span></div>
  111. <div class="viewcode-block" id="loadGXL"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadGXL">[docs]</a><span class="k">def</span> <span class="nf">loadGXL</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
  112. <span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">basename</span>
  113. <span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>
  114. <span class="kn">import</span> <span class="nn">xml.etree.ElementTree</span> <span class="k">as</span> <span class="nn">ET</span>
  115. <span class="n">tree</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  116. <span class="n">root</span> <span class="o">=</span> <span class="n">tree</span><span class="o">.</span><span class="n">getroot</span><span class="p">()</span>
  117. <span class="n">index</span> <span class="o">=</span> <span class="mi">0</span>
  118. <span class="n">g</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">(</span><span class="n">filename</span><span class="o">=</span><span class="n">basename</span><span class="p">(</span><span class="n">filename</span><span class="p">),</span> <span class="n">name</span><span class="o">=</span><span class="n">root</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;id&#39;</span><span class="p">])</span>
  119. <span class="n">dic</span> <span class="o">=</span> <span class="p">{}</span> <span class="c1"># used to retrieve incident nodes of edges</span>
  120. <span class="k">for</span> <span class="n">node</span> <span class="ow">in</span> <span class="n">root</span><span class="o">.</span><span class="n">iter</span><span class="p">(</span><span class="s1">&#39;node&#39;</span><span class="p">):</span>
  121. <span class="n">dic</span><span class="p">[</span><span class="n">node</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;id&#39;</span><span class="p">]]</span> <span class="o">=</span> <span class="n">index</span>
  122. <span class="n">labels</span> <span class="o">=</span> <span class="p">{}</span>
  123. <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="n">node</span><span class="o">.</span><span class="n">iter</span><span class="p">(</span><span class="s1">&#39;attr&#39;</span><span class="p">):</span>
  124. <span class="n">labels</span><span class="p">[</span><span class="n">attr</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]]</span> <span class="o">=</span> <span class="n">attr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span>
  125. <span class="k">if</span> <span class="s1">&#39;chem&#39;</span> <span class="ow">in</span> <span class="n">labels</span><span class="p">:</span>
  126. <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;label&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;chem&#39;</span><span class="p">]</span>
  127. <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;atom&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;chem&#39;</span><span class="p">]</span>
  128. <span class="n">g</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="o">**</span><span class="n">labels</span><span class="p">)</span>
  129. <span class="n">index</span> <span class="o">+=</span> <span class="mi">1</span>
  130. <span class="k">for</span> <span class="n">edge</span> <span class="ow">in</span> <span class="n">root</span><span class="o">.</span><span class="n">iter</span><span class="p">(</span><span class="s1">&#39;edge&#39;</span><span class="p">):</span>
  131. <span class="n">labels</span> <span class="o">=</span> <span class="p">{}</span>
  132. <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="n">edge</span><span class="o">.</span><span class="n">iter</span><span class="p">(</span><span class="s1">&#39;attr&#39;</span><span class="p">):</span>
  133. <span class="n">labels</span><span class="p">[</span><span class="n">attr</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]]</span> <span class="o">=</span> <span class="n">attr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span>
  134. <span class="k">if</span> <span class="s1">&#39;valence&#39;</span> <span class="ow">in</span> <span class="n">labels</span><span class="p">:</span>
  135. <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;label&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;valence&#39;</span><span class="p">]</span>
  136. <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;bond_type&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span><span class="p">[</span><span class="s1">&#39;valence&#39;</span><span class="p">]</span>
  137. <span class="n">g</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="n">dic</span><span class="p">[</span><span class="n">edge</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;from&#39;</span><span class="p">]],</span> <span class="n">dic</span><span class="p">[</span><span class="n">edge</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;to&#39;</span><span class="p">]],</span> <span class="o">**</span><span class="n">labels</span><span class="p">)</span>
  138. <span class="k">return</span> <span class="n">g</span></div>
  139. <div class="viewcode-block" id="saveGXL"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.saveGXL">[docs]</a><span class="k">def</span> <span class="nf">saveGXL</span><span class="p">(</span><span class="n">graph</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;benoit&#39;</span><span class="p">):</span>
  140. <span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">&#39;benoit&#39;</span><span class="p">:</span>
  141. <span class="kn">import</span> <span class="nn">xml.etree.ElementTree</span> <span class="k">as</span> <span class="nn">ET</span>
  142. <span class="n">root_node</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">Element</span><span class="p">(</span><span class="s1">&#39;gxl&#39;</span><span class="p">)</span>
  143. <span class="n">attr</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
  144. <span class="n">attr</span><span class="p">[</span><span class="s1">&#39;id&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">graph</span><span class="o">.</span><span class="n">graph</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">])</span>
  145. <span class="n">attr</span><span class="p">[</span><span class="s1">&#39;edgeids&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;true&#39;</span>
  146. <span class="n">attr</span><span class="p">[</span><span class="s1">&#39;edgemode&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;undirected&#39;</span>
  147. <span class="n">graph_node</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span><span class="n">root_node</span><span class="p">,</span> <span class="s1">&#39;graph&#39;</span><span class="p">,</span> <span class="n">attrib</span><span class="o">=</span><span class="n">attr</span><span class="p">)</span>
  148. <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">graph</span><span class="p">:</span>
  149. <span class="n">current_node</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span><span class="n">graph_node</span><span class="p">,</span> <span class="s1">&#39;node&#39;</span><span class="p">,</span> <span class="n">attrib</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;id&#39;</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">v</span><span class="p">)})</span>
  150. <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="n">graph</span><span class="o">.</span><span class="n">nodes</span><span class="p">[</span><span class="n">v</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
  151. <span class="n">cur_attr</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span>
  152. <span class="n">current_node</span><span class="p">,</span> <span class="s1">&#39;attr&#39;</span><span class="p">,</span> <span class="n">attrib</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="n">attr</span><span class="p">})</span>
  153. <span class="n">cur_value</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span><span class="n">cur_attr</span><span class="p">,</span>
  154. <span class="n">graph</span><span class="o">.</span><span class="n">nodes</span><span class="p">[</span><span class="n">v</span><span class="p">][</span><span class="n">attr</span><span class="p">]</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
  155. <span class="n">cur_value</span><span class="o">.</span><span class="n">text</span> <span class="o">=</span> <span class="n">graph</span><span class="o">.</span><span class="n">nodes</span><span class="p">[</span><span class="n">v</span><span class="p">][</span><span class="n">attr</span><span class="p">]</span>
  156. <span class="k">for</span> <span class="n">v1</span> <span class="ow">in</span> <span class="n">graph</span><span class="p">:</span>
  157. <span class="k">for</span> <span class="n">v2</span> <span class="ow">in</span> <span class="n">graph</span><span class="p">[</span><span class="n">v1</span><span class="p">]:</span>
  158. <span class="k">if</span> <span class="p">(</span><span class="n">v1</span> <span class="o">&lt;</span> <span class="n">v2</span><span class="p">):</span> <span class="c1"># Non oriented graphs</span>
  159. <span class="n">cur_edge</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span>
  160. <span class="n">graph_node</span><span class="p">,</span>
  161. <span class="s1">&#39;edge&#39;</span><span class="p">,</span>
  162. <span class="n">attrib</span><span class="o">=</span><span class="p">{</span>
  163. <span class="s1">&#39;from&#39;</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">v1</span><span class="p">),</span>
  164. <span class="s1">&#39;to&#39;</span><span class="p">:</span> <span class="nb">str</span><span class="p">(</span><span class="n">v2</span><span class="p">)</span>
  165. <span class="p">})</span>
  166. <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="n">graph</span><span class="p">[</span><span class="n">v1</span><span class="p">][</span><span class="n">v2</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
  167. <span class="n">cur_attr</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span>
  168. <span class="n">cur_edge</span><span class="p">,</span> <span class="s1">&#39;attr&#39;</span><span class="p">,</span> <span class="n">attrib</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="n">attr</span><span class="p">})</span>
  169. <span class="n">cur_value</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span>
  170. <span class="n">cur_attr</span><span class="p">,</span> <span class="n">graph</span><span class="p">[</span><span class="n">v1</span><span class="p">][</span><span class="n">v2</span><span class="p">][</span><span class="n">attr</span><span class="p">]</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
  171. <span class="n">cur_value</span><span class="o">.</span><span class="n">text</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">graph</span><span class="p">[</span><span class="n">v1</span><span class="p">][</span><span class="n">v2</span><span class="p">][</span><span class="n">attr</span><span class="p">])</span>
  172. <span class="n">tree</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">ElementTree</span><span class="p">(</span><span class="n">root_node</span><span class="p">)</span>
  173. <span class="n">tree</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  174. <span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">&#39;gedlib&#39;</span><span class="p">:</span>
  175. <span class="c1"># reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22</span>
  176. <span class="c1"># pass</span>
  177. <span class="n">gxl_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span>
  178. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;?xml version=</span><span class="se">\&quot;</span><span class="s2">1.0</span><span class="se">\&quot;</span><span class="s2"> encoding=</span><span class="se">\&quot;</span><span class="s2">UTF-8</span><span class="se">\&quot;</span><span class="s2">?&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  179. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;!DOCTYPE gxl SYSTEM </span><span class="se">\&quot;</span><span class="s2">http://www.gupro.de/GXL/gxl-1.0.dtd</span><span class="se">\&quot;</span><span class="s2">&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  180. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;gxl xmlns:xlink=</span><span class="se">\&quot;</span><span class="s2">http://www.w3.org/1999/xlink</span><span class="se">\&quot;</span><span class="s2">&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  181. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;graph id=</span><span class="se">\&quot;</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">graph</span><span class="o">.</span><span class="n">graph</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2"> edgeids=</span><span class="se">\&quot;</span><span class="s2">true</span><span class="se">\&quot;</span><span class="s2"> edgemode=</span><span class="se">\&quot;</span><span class="s2">undirected</span><span class="se">\&quot;</span><span class="s2">&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  182. <span class="k">for</span> <span class="n">v</span><span class="p">,</span> <span class="n">attrs</span> <span class="ow">in</span> <span class="n">graph</span><span class="o">.</span><span class="n">nodes</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
  183. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;node id=</span><span class="se">\&quot;</span><span class="s2">_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2">&gt;&quot;</span><span class="p">)</span>
  184. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;attr name=</span><span class="se">\&quot;</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="s2">&quot;chem&quot;</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2">&gt;&lt;int&gt;&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;chem&#39;</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;&lt;/int&gt;&lt;/attr&gt;&quot;</span><span class="p">)</span>
  185. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/node&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  186. <span class="k">for</span> <span class="n">v1</span><span class="p">,</span> <span class="n">v2</span><span class="p">,</span> <span class="n">attrs</span> <span class="ow">in</span> <span class="n">graph</span><span class="o">.</span><span class="n">edges</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
  187. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;edge from=</span><span class="se">\&quot;</span><span class="s2">_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">v1</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2"> to=</span><span class="se">\&quot;</span><span class="s2">_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">v2</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2">&gt;&quot;</span><span class="p">)</span>
  188. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;attr name=</span><span class="se">\&quot;</span><span class="s2">valence</span><span class="se">\&quot;</span><span class="s2">&gt;&lt;int&gt;&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;valence&#39;</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;&lt;/int&gt;&lt;/attr&gt;&quot;</span><span class="p">)</span>
  189. <span class="c1"># gxl_file.write(&quot;&lt;attr name=\&quot;valence\&quot;&gt;&lt;int&gt;&quot; + &quot;1&quot; + &quot;&lt;/int&gt;&lt;/attr&gt;&quot;)</span>
  190. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/edge&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  191. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/graph&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  192. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/gxl&gt;&quot;</span><span class="p">)</span>
  193. <span class="n">gxl_file</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
  194. <span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">&#39;gedlib-letter&#39;</span><span class="p">:</span>
  195. <span class="c1"># reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22</span>
  196. <span class="c1"># and https://github.com/dbblumenthal/gedlib/blob/master/data/datasets/Letter/HIGH/AP1_0000.gxl</span>
  197. <span class="n">gxl_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span>
  198. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;?xml version=</span><span class="se">\&quot;</span><span class="s2">1.0</span><span class="se">\&quot;</span><span class="s2"> encoding=</span><span class="se">\&quot;</span><span class="s2">UTF-8</span><span class="se">\&quot;</span><span class="s2">?&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  199. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;!DOCTYPE gxl SYSTEM </span><span class="se">\&quot;</span><span class="s2">http://www.gupro.de/GXL/gxl-1.0.dtd</span><span class="se">\&quot;</span><span class="s2">&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  200. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;gxl xmlns:xlink=</span><span class="se">\&quot;</span><span class="s2">http://www.w3.org/1999/xlink</span><span class="se">\&quot;</span><span class="s2">&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  201. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;graph id=</span><span class="se">\&quot;</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">graph</span><span class="o">.</span><span class="n">graph</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2"> edgeids=</span><span class="se">\&quot;</span><span class="s2">false</span><span class="se">\&quot;</span><span class="s2"> edgemode=</span><span class="se">\&quot;</span><span class="s2">undirected</span><span class="se">\&quot;</span><span class="s2">&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  202. <span class="k">for</span> <span class="n">v</span><span class="p">,</span> <span class="n">attrs</span> <span class="ow">in</span> <span class="n">graph</span><span class="o">.</span><span class="n">nodes</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
  203. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;node id=</span><span class="se">\&quot;</span><span class="s2">_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2">&gt;&quot;</span><span class="p">)</span>
  204. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;attr name=</span><span class="se">\&quot;</span><span class="s2">x</span><span class="se">\&quot;</span><span class="s2">&gt;&lt;float&gt;&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;&lt;/float&gt;&lt;/attr&gt;&quot;</span><span class="p">)</span>
  205. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;attr name=</span><span class="se">\&quot;</span><span class="s2">y</span><span class="se">\&quot;</span><span class="s2">&gt;&lt;float&gt;&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">][</span><span class="mi">1</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;&lt;/float&gt;&lt;/attr&gt;&quot;</span><span class="p">)</span>
  206. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/node&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  207. <span class="k">for</span> <span class="n">v1</span><span class="p">,</span> <span class="n">v2</span><span class="p">,</span> <span class="n">attrs</span> <span class="ow">in</span> <span class="n">graph</span><span class="o">.</span><span class="n">edges</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
  208. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;edge from=</span><span class="se">\&quot;</span><span class="s2">_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">v1</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2"> to=</span><span class="se">\&quot;</span><span class="s2">_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">v2</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2">/&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  209. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/graph&gt;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
  210. <span class="n">gxl_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;/gxl&gt;&quot;</span><span class="p">)</span>
  211. <span class="n">gxl_file</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
  212. <div class="viewcode-block" id="loadSDF"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadSDF">[docs]</a><span class="k">def</span> <span class="nf">loadSDF</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
  213. <span class="sd">&quot;&quot;&quot;load data from structured data file (.sdf file).</span>
  214. <span class="sd"> Notes</span>
  215. <span class="sd"> ------</span>
  216. <span class="sd"> A SDF file contains a group of molecules, represented in the similar way as in MOL format.</span>
  217. <span class="sd"> Check http://www.nonlinear.com/progenesis/sdf-studio/v0.9/faq/sdf-file-format-guidance.aspx, 2018 for detailed structure.</span>
  218. <span class="sd"> &quot;&quot;&quot;</span>
  219. <span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>
  220. <span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">basename</span>
  221. <span class="kn">from</span> <span class="nn">tqdm</span> <span class="k">import</span> <span class="n">tqdm</span>
  222. <span class="kn">import</span> <span class="nn">sys</span>
  223. <span class="n">data</span> <span class="o">=</span> <span class="p">[]</span>
  224. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  225. <span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  226. <span class="n">index</span> <span class="o">=</span> <span class="mi">0</span>
  227. <span class="n">pbar</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">total</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="s1">&#39;load SDF&#39;</span><span class="p">,</span> <span class="n">file</span><span class="o">=</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">)</span>
  228. <span class="k">while</span> <span class="n">index</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">):</span>
  229. <span class="n">index_old</span> <span class="o">=</span> <span class="n">index</span>
  230. <span class="n">g</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">content</span><span class="p">[</span><span class="n">index</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span> <span class="c1"># set name of the graph</span>
  231. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">index</span> <span class="o">+</span> <span class="mi">3</span><span class="p">]</span>
  232. <span class="n">nb_nodes</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[:</span><span class="mi">3</span><span class="p">])</span> <span class="c1"># number of the nodes</span>
  233. <span class="n">nb_edges</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">3</span><span class="p">:</span><span class="mi">6</span><span class="p">])</span> <span class="c1"># number of the edges</span>
  234. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nb_nodes</span><span class="p">):</span>
  235. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="n">index</span> <span class="o">+</span> <span class="mi">4</span><span class="p">]</span>
  236. <span class="n">g</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">atom</span><span class="o">=</span><span class="n">tmp</span><span class="p">[</span><span class="mi">31</span><span class="p">:</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
  237. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nb_edges</span><span class="p">):</span>
  238. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="n">index</span> <span class="o">+</span> <span class="n">g</span><span class="o">.</span><span class="n">number_of_nodes</span><span class="p">()</span> <span class="o">+</span> <span class="mi">4</span><span class="p">]</span>
  239. <span class="n">tmp</span> <span class="o">=</span> <span class="p">[</span><span class="n">tmp</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span> <span class="o">+</span> <span class="mi">3</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">tmp</span><span class="p">),</span> <span class="mi">3</span><span class="p">)]</span>
  240. <span class="n">g</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span>
  241. <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">bond_type</span><span class="o">=</span><span class="n">tmp</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
  242. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">g</span><span class="p">)</span>
  243. <span class="n">index</span> <span class="o">+=</span> <span class="mi">4</span> <span class="o">+</span> <span class="n">g</span><span class="o">.</span><span class="n">number_of_nodes</span><span class="p">()</span> <span class="o">+</span> <span class="n">g</span><span class="o">.</span><span class="n">number_of_edges</span><span class="p">()</span>
  244. <span class="k">while</span> <span class="n">content</span><span class="p">[</span><span class="n">index</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="o">!=</span> <span class="s1">&#39;$$$$&#39;</span><span class="p">:</span> <span class="c1"># seperator</span>
  245. <span class="n">index</span> <span class="o">+=</span> <span class="mi">1</span>
  246. <span class="n">index</span> <span class="o">+=</span> <span class="mi">1</span>
  247. <span class="n">pbar</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">index</span> <span class="o">-</span> <span class="n">index_old</span><span class="p">)</span>
  248. <span class="n">pbar</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
  249. <span class="n">pbar</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
  250. <span class="k">return</span> <span class="n">data</span></div>
  251. <div class="viewcode-block" id="loadMAT"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadMAT">[docs]</a><span class="k">def</span> <span class="nf">loadMAT</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">):</span>
  252. <span class="sd">&quot;&quot;&quot;Load graph data from a MATLAB (up to version 7.1) .mat file.</span>
  253. <span class="sd"> Notes</span>
  254. <span class="sd"> ------</span>
  255. <span class="sd"> A MAT file contains a struct array containing graphs, and a column vector lx containing a class label for each graph.</span>
  256. <span class="sd"> Check README in downloadable file in http://mlcb.is.tuebingen.mpg.de/Mitarbeiter/Nino/WL/, 2018 for detailed structure.</span>
  257. <span class="sd"> &quot;&quot;&quot;</span>
  258. <span class="kn">from</span> <span class="nn">scipy.io</span> <span class="k">import</span> <span class="n">loadmat</span>
  259. <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
  260. <span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>
  261. <span class="n">data</span> <span class="o">=</span> <span class="p">[]</span>
  262. <span class="n">content</span> <span class="o">=</span> <span class="n">loadmat</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  263. <span class="n">order</span> <span class="o">=</span> <span class="n">extra_params</span><span class="p">[</span><span class="s1">&#39;am_sp_al_nl_el&#39;</span><span class="p">]</span>
  264. <span class="c1"># print(content)</span>
  265. <span class="c1"># print(&#39;----&#39;)</span>
  266. <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">content</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
  267. <span class="k">if</span> <span class="n">key</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;l&#39;</span><span class="p">:</span> <span class="c1"># class label</span>
  268. <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">value</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span>
  269. <span class="c1"># print(y)</span>
  270. <span class="k">elif</span> <span class="n">key</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">&#39;_&#39;</span><span class="p">:</span>
  271. <span class="c1"># print(value[0][0][0])</span>
  272. <span class="c1"># print()</span>
  273. <span class="c1"># print(value[0][0][1])</span>
  274. <span class="c1"># print()</span>
  275. <span class="c1"># print(value[0][0][2])</span>
  276. <span class="c1"># print()</span>
  277. <span class="c1"># if len(value[0][0]) &gt; 3:</span>
  278. <span class="c1"># print(value[0][0][3])</span>
  279. <span class="c1"># print(&#39;----&#39;)</span>
  280. <span class="c1"># if adjacency matrix is not compressed / edge label exists</span>
  281. <span class="k">if</span> <span class="n">order</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
  282. <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">item</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span>
  283. <span class="c1"># print(item)</span>
  284. <span class="c1"># print(&#39;------&#39;)</span>
  285. <span class="n">g</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">i</span><span class="p">)</span> <span class="c1"># set name of the graph</span>
  286. <span class="n">nl</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="n">order</span><span class="p">[</span><span class="mi">3</span><span class="p">]][</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> <span class="c1"># node label</span>
  287. <span class="c1"># print(item[order[3]])</span>
  288. <span class="c1"># print()</span>
  289. <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">label</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">nl</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span>
  290. <span class="n">g</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="n">atom</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">label</span><span class="p">))</span>
  291. <span class="n">el</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="n">order</span><span class="p">[</span><span class="mi">4</span><span class="p">]][</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># edge label</span>
  292. <span class="k">for</span> <span class="n">edge</span> <span class="ow">in</span> <span class="n">el</span><span class="p">:</span>
  293. <span class="n">g</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span>
  294. <span class="n">edge</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">edge</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">bond_type</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">edge</span><span class="p">[</span><span class="mi">2</span><span class="p">]))</span>
  295. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">g</span><span class="p">)</span>
  296. <span class="k">else</span><span class="p">:</span>
  297. <span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="k">import</span> <span class="n">csc_matrix</span>
  298. <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">item</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span>
  299. <span class="c1"># print(item)</span>
  300. <span class="c1"># print(&#39;------&#39;)</span>
  301. <span class="n">g</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">i</span><span class="p">)</span> <span class="c1"># set name of the graph</span>
  302. <span class="n">nl</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="n">order</span><span class="p">[</span><span class="mi">3</span><span class="p">]][</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> <span class="c1"># node label</span>
  303. <span class="c1"># print(nl)</span>
  304. <span class="c1"># print()</span>
  305. <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">label</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">nl</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span>
  306. <span class="n">g</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="n">atom</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">label</span><span class="p">))</span>
  307. <span class="n">sam</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="n">order</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span> <span class="c1"># sparse adjacency matrix</span>
  308. <span class="n">index_no0</span> <span class="o">=</span> <span class="n">sam</span><span class="o">.</span><span class="n">nonzero</span><span class="p">()</span>
  309. <span class="k">for</span> <span class="n">col</span><span class="p">,</span> <span class="n">row</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">index_no0</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">index_no0</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
  310. <span class="c1"># print(col)</span>
  311. <span class="c1"># print(row)</span>
  312. <span class="n">g</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">row</span><span class="p">)</span>
  313. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">g</span><span class="p">)</span>
  314. <span class="c1"># print(g.edges(data=True))</span>
  315. <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">y</span></div>
  316. <div class="viewcode-block" id="loadTXT"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadTXT">[docs]</a><span class="k">def</span> <span class="nf">loadTXT</span><span class="p">(</span><span class="n">dirname_dataset</span><span class="p">):</span>
  317. <span class="sd">&quot;&quot;&quot;Load graph data from a .txt file.</span>
  318. <span class="sd"> Notes</span>
  319. <span class="sd"> ------</span>
  320. <span class="sd"> The graph data is loaded from separate files.</span>
  321. <span class="sd"> Check README in downloadable file http://tiny.cc/PK_MLJ_data, 2018 for detailed structure.</span>
  322. <span class="sd"> &quot;&quot;&quot;</span>
  323. <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
  324. <span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>
  325. <span class="kn">from</span> <span class="nn">os</span> <span class="k">import</span> <span class="n">listdir</span>
  326. <span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">dirname</span>
  327. <span class="c1"># load data file names</span>
  328. <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">listdir</span><span class="p">(</span><span class="n">dirname_dataset</span><span class="p">):</span>
  329. <span class="k">if</span> <span class="s1">&#39;_A&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  330. <span class="n">fam</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  331. <span class="k">elif</span> <span class="s1">&#39;_graph_indicator&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  332. <span class="n">fgi</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  333. <span class="k">elif</span> <span class="s1">&#39;_graph_labels&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  334. <span class="n">fgl</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  335. <span class="k">elif</span> <span class="s1">&#39;_node_labels&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  336. <span class="n">fnl</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  337. <span class="k">elif</span> <span class="s1">&#39;_edge_labels&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  338. <span class="n">fel</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  339. <span class="k">elif</span> <span class="s1">&#39;_edge_attributes&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  340. <span class="n">fea</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  341. <span class="k">elif</span> <span class="s1">&#39;_node_attributes&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  342. <span class="n">fna</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  343. <span class="k">elif</span> <span class="s1">&#39;_graph_attributes&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  344. <span class="n">fga</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  345. <span class="c1"># this is supposed to be the node attrs, make sure to put this as the last &#39;elif&#39;</span>
  346. <span class="k">elif</span> <span class="s1">&#39;_attributes&#39;</span> <span class="ow">in</span> <span class="n">name</span><span class="p">:</span>
  347. <span class="n">fna</span> <span class="o">=</span> <span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">name</span>
  348. <span class="n">content_gi</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fgi</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span> <span class="c1"># graph indicator</span>
  349. <span class="n">content_am</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fam</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span> <span class="c1"># adjacency matrix</span>
  350. <span class="n">content_gl</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fgl</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span> <span class="c1"># lass labels</span>
  351. <span class="c1"># create graphs and add nodes</span>
  352. <span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">content_gl</span><span class="p">))]</span>
  353. <span class="k">if</span> <span class="s1">&#39;fnl&#39;</span> <span class="ow">in</span> <span class="nb">locals</span><span class="p">():</span>
  354. <span class="n">content_nl</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fnl</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span> <span class="c1"># node labels</span>
  355. <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">content_gi</span><span class="p">):</span>
  356. <span class="c1"># transfer to int first in case of unexpected blanks</span>
  357. <span class="n">data</span><span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">line</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">atom</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">content_nl</span><span class="p">[</span><span class="n">i</span><span class="p">])))</span>
  358. <span class="k">else</span><span class="p">:</span>
  359. <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">content_gi</span><span class="p">):</span>
  360. <span class="n">data</span><span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">line</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
  361. <span class="c1"># add edges</span>
  362. <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">content_am</span><span class="p">:</span>
  363. <span class="n">tmp</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
  364. <span class="n">n1</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span>
  365. <span class="n">n2</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span>
  366. <span class="c1"># ignore edge weight here.</span>
  367. <span class="n">g</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">content_gi</span><span class="p">[</span><span class="n">n1</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span>
  368. <span class="n">data</span><span class="p">[</span><span class="n">g</span><span class="p">]</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="n">n1</span><span class="p">,</span> <span class="n">n2</span><span class="p">)</span>
  369. <span class="c1"># add edge labels</span>
  370. <span class="k">if</span> <span class="s1">&#39;fel&#39;</span> <span class="ow">in</span> <span class="nb">locals</span><span class="p">():</span>
  371. <span class="n">content_el</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fel</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  372. <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">content_el</span><span class="p">):</span>
  373. <span class="n">label</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
  374. <span class="n">n</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">content_am</span><span class="p">[</span><span class="n">index</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)]</span>
  375. <span class="n">g</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">content_gi</span><span class="p">[</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]])</span> <span class="o">-</span> <span class="mi">1</span>
  376. <span class="n">data</span><span class="p">[</span><span class="n">g</span><span class="p">]</span><span class="o">.</span><span class="n">edges</span><span class="p">[</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">n</span><span class="p">[</span><span class="mi">1</span><span class="p">]][</span><span class="s1">&#39;bond_type&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">label</span>
  377. <span class="c1"># add node attributes</span>
  378. <span class="k">if</span> <span class="s1">&#39;fna&#39;</span> <span class="ow">in</span> <span class="nb">locals</span><span class="p">():</span>
  379. <span class="n">content_na</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fna</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  380. <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">content_na</span><span class="p">):</span>
  381. <span class="n">attrs</span> <span class="o">=</span> <span class="p">[</span><span class="n">i</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)]</span>
  382. <span class="n">g</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">content_gi</span><span class="p">[</span><span class="n">i</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span>
  383. <span class="n">data</span><span class="p">[</span><span class="n">g</span><span class="p">]</span><span class="o">.</span><span class="n">nodes</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">attrs</span>
  384. <span class="c1"># add edge attributes</span>
  385. <span class="k">if</span> <span class="s1">&#39;fea&#39;</span> <span class="ow">in</span> <span class="nb">locals</span><span class="p">():</span>
  386. <span class="n">content_ea</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fea</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  387. <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">content_ea</span><span class="p">):</span>
  388. <span class="n">attrs</span> <span class="o">=</span> <span class="p">[</span><span class="n">i</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)]</span>
  389. <span class="n">n</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">content_am</span><span class="p">[</span><span class="n">index</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)]</span>
  390. <span class="n">g</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">content_gi</span><span class="p">[</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">]])</span> <span class="o">-</span> <span class="mi">1</span>
  391. <span class="n">data</span><span class="p">[</span><span class="n">g</span><span class="p">]</span><span class="o">.</span><span class="n">edges</span><span class="p">[</span><span class="n">n</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">n</span><span class="p">[</span><span class="mi">1</span><span class="p">]][</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">attrs</span>
  392. <span class="c1"># load y</span>
  393. <span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">content_gl</span><span class="p">]</span>
  394. <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">y</span></div>
  395. <div class="viewcode-block" id="loadDataset"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadDataset">[docs]</a><span class="k">def</span> <span class="nf">loadDataset</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">filename_y</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">extra_params</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
  396. <span class="sd">&quot;&quot;&quot;Read graph data from filename and load them as NetworkX graphs.</span>
  397. <span class="sd"> Parameters</span>
  398. <span class="sd"> ----------</span>
  399. <span class="sd"> filename : string</span>
  400. <span class="sd"> The name of the file from where the dataset is read.</span>
  401. <span class="sd"> filename_y : string</span>
  402. <span class="sd"> The name of file of the targets corresponding to graphs.</span>
  403. <span class="sd"> extra_params : dict</span>
  404. <span class="sd"> Extra parameters only designated to &#39;.mat&#39; format.</span>
  405. <span class="sd"> Return</span>
  406. <span class="sd"> ------</span>
  407. <span class="sd"> data : List of NetworkX graph.</span>
  408. <span class="sd"> y : List</span>
  409. <span class="sd"> Targets corresponding to graphs.</span>
  410. <span class="sd"> </span>
  411. <span class="sd"> Notes</span>
  412. <span class="sd"> -----</span>
  413. <span class="sd"> This function supports following graph dataset formats:</span>
  414. <span class="sd"> &#39;ds&#39;: load data from .ds file. See comments of function loadFromDS for a example.</span>
  415. <span class="sd"> &#39;cxl&#39;: load data from Graph eXchange Language file (.cxl file). See </span>
  416. <span class="sd"> http://www.gupro.de/GXL/Introduction/background.html, 2019 for detail.</span>
  417. <span class="sd"> &#39;sdf&#39;: load data from structured data file (.sdf file). See </span>
  418. <span class="sd"> http://www.nonlinear.com/progenesis/sdf-studio/v0.9/faq/sdf-file-format-guidance.aspx, </span>
  419. <span class="sd"> 2018 for details.</span>
  420. <span class="sd"> &#39;mat&#39;: Load graph data from a MATLAB (up to version 7.1) .mat file. See</span>
  421. <span class="sd"> README in downloadable file in http://mlcb.is.tuebingen.mpg.de/Mitarbeiter/Nino/WL/, </span>
  422. <span class="sd"> 2018 for details.</span>
  423. <span class="sd"> &#39;txt&#39;: Load graph data from a special .txt file. See</span>
  424. <span class="sd"> https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets,</span>
  425. <span class="sd"> 2019 for details. Note here filename is the name of either .txt file in</span>
  426. <span class="sd"> the dataset directory.</span>
  427. <span class="sd"> &quot;&quot;&quot;</span>
  428. <span class="n">extension</span> <span class="o">=</span> <span class="n">splitext</span><span class="p">(</span><span class="n">filename</span><span class="p">)[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">:]</span>
  429. <span class="k">if</span> <span class="n">extension</span> <span class="o">==</span> <span class="s2">&quot;ds&quot;</span><span class="p">:</span>
  430. <span class="n">data</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">loadFromDS</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">filename_y</span><span class="p">)</span>
  431. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s2">&quot;cxl&quot;</span><span class="p">:</span>
  432. <span class="kn">import</span> <span class="nn">xml.etree.ElementTree</span> <span class="k">as</span> <span class="nn">ET</span>
  433. <span class="n">dirname_dataset</span> <span class="o">=</span> <span class="n">dirname</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  434. <span class="n">tree</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  435. <span class="n">root</span> <span class="o">=</span> <span class="n">tree</span><span class="o">.</span><span class="n">getroot</span><span class="p">()</span>
  436. <span class="n">data</span> <span class="o">=</span> <span class="p">[]</span>
  437. <span class="n">y</span> <span class="o">=</span> <span class="p">[]</span>
  438. <span class="k">for</span> <span class="n">graph</span> <span class="ow">in</span> <span class="n">root</span><span class="o">.</span><span class="n">iter</span><span class="p">(</span><span class="s1">&#39;graph&#39;</span><span class="p">):</span>
  439. <span class="n">mol_filename</span> <span class="o">=</span> <span class="n">graph</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;file&#39;</span><span class="p">]</span>
  440. <span class="n">mol_class</span> <span class="o">=</span> <span class="n">graph</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">]</span>
  441. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loadGXL</span><span class="p">(</span><span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">mol_filename</span><span class="p">))</span>
  442. <span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mol_class</span><span class="p">)</span>
  443. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s1">&#39;xml&#39;</span><span class="p">:</span>
  444. <span class="n">data</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">loadFromXML</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">)</span>
  445. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s2">&quot;sdf&quot;</span><span class="p">:</span>
  446. <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
  447. <span class="kn">from</span> <span class="nn">tqdm</span> <span class="k">import</span> <span class="n">tqdm</span>
  448. <span class="kn">import</span> <span class="nn">sys</span>
  449. <span class="n">data</span> <span class="o">=</span> <span class="n">loadSDF</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  450. <span class="n">y_raw</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename_y</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  451. <span class="n">y_raw</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
  452. <span class="n">tmp0</span> <span class="o">=</span> <span class="p">[]</span>
  453. <span class="n">tmp1</span> <span class="o">=</span> <span class="p">[]</span>
  454. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">y_raw</span><span class="p">)):</span>
  455. <span class="n">tmp</span> <span class="o">=</span> <span class="n">y_raw</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
  456. <span class="n">tmp0</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
  457. <span class="n">tmp1</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
  458. <span class="n">y</span> <span class="o">=</span> <span class="p">[]</span>
  459. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)),</span> <span class="n">desc</span><span class="o">=</span><span class="s1">&#39;ajust data&#39;</span><span class="p">,</span> <span class="n">file</span><span class="o">=</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">):</span>
  460. <span class="k">try</span><span class="p">:</span>
  461. <span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tmp1</span><span class="p">[</span><span class="n">tmp0</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">name</span><span class="p">)]</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
  462. <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> <span class="c1"># if data[i].name not in tmp0</span>
  463. <span class="n">data</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
  464. <span class="n">data</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">a</span><span class="p">:</span> <span class="n">a</span> <span class="o">!=</span> <span class="p">[],</span> <span class="n">data</span><span class="p">))</span>
  465. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s2">&quot;mat&quot;</span><span class="p">:</span>
  466. <span class="n">data</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">loadMAT</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">)</span>
  467. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s1">&#39;txt&#39;</span><span class="p">:</span>
  468. <span class="n">dirname_dataset</span> <span class="o">=</span> <span class="n">dirname</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  469. <span class="n">data</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">loadTXT</span><span class="p">(</span><span class="n">dirname_dataset</span><span class="p">)</span>
  470. <span class="c1"># print(len(y))</span>
  471. <span class="c1"># print(y)</span>
  472. <span class="c1"># print(data[0].nodes(data=True))</span>
  473. <span class="c1"># print(&#39;----&#39;)</span>
  474. <span class="c1"># print(data[0].edges(data=True))</span>
  475. <span class="c1"># for g in data:</span>
  476. <span class="c1"># print(g.nodes(data=True))</span>
  477. <span class="c1"># print(&#39;----&#39;)</span>
  478. <span class="c1"># print(g.edges(data=True))</span>
  479. <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">y</span></div>
  480. <div class="viewcode-block" id="loadFromXML"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadFromXML">[docs]</a><span class="k">def</span> <span class="nf">loadFromXML</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">extra_params</span><span class="p">):</span>
  481. <span class="kn">import</span> <span class="nn">xml.etree.ElementTree</span> <span class="k">as</span> <span class="nn">ET</span>
  482. <span class="k">if</span> <span class="n">extra_params</span><span class="p">:</span>
  483. <span class="n">dirname_dataset</span> <span class="o">=</span> <span class="n">extra_params</span>
  484. <span class="k">else</span><span class="p">:</span>
  485. <span class="n">dirname_dataset</span> <span class="o">=</span> <span class="n">dirname</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  486. <span class="n">tree</span> <span class="o">=</span> <span class="n">ET</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  487. <span class="n">root</span> <span class="o">=</span> <span class="n">tree</span><span class="o">.</span><span class="n">getroot</span><span class="p">()</span>
  488. <span class="n">data</span> <span class="o">=</span> <span class="p">[]</span>
  489. <span class="n">y</span> <span class="o">=</span> <span class="p">[]</span>
  490. <span class="k">for</span> <span class="n">graph</span> <span class="ow">in</span> <span class="n">root</span><span class="o">.</span><span class="n">iter</span><span class="p">(</span><span class="s1">&#39;graph&#39;</span><span class="p">):</span>
  491. <span class="n">mol_filename</span> <span class="o">=</span> <span class="n">graph</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;file&#39;</span><span class="p">]</span>
  492. <span class="n">mol_class</span> <span class="o">=</span> <span class="n">graph</span><span class="o">.</span><span class="n">attrib</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">]</span>
  493. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loadGXL</span><span class="p">(</span><span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">mol_filename</span><span class="p">))</span>
  494. <span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mol_class</span><span class="p">)</span>
  495. <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">y</span></div>
  496. <div class="viewcode-block" id="loadFromDS"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.loadFromDS">[docs]</a><span class="k">def</span> <span class="nf">loadFromDS</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">filename_y</span><span class="p">):</span>
  497. <span class="sd">&quot;&quot;&quot;Load data from .ds file.</span>
  498. <span class="sd"> Possible graph formats include:</span>
  499. <span class="sd"> &#39;.ct&#39;: see function loadCT for detail.</span>
  500. <span class="sd"> &#39;.gxl&#39;: see dunction loadGXL for detail.</span>
  501. <span class="sd"> Note these graph formats are checked automatically by the extensions of </span>
  502. <span class="sd"> graph files.</span>
  503. <span class="sd"> &quot;&quot;&quot;</span>
  504. <span class="n">dirname_dataset</span> <span class="o">=</span> <span class="n">dirname</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  505. <span class="n">data</span> <span class="o">=</span> <span class="p">[]</span>
  506. <span class="n">y</span> <span class="o">=</span> <span class="p">[]</span>
  507. <span class="n">content</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  508. <span class="n">extension</span> <span class="o">=</span> <span class="n">splitext</span><span class="p">(</span><span class="n">content</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39; &#39;</span><span class="p">)[</span><span class="mi">0</span><span class="p">])[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">:]</span>
  509. <span class="k">if</span> <span class="n">filename_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">filename_y</span> <span class="o">==</span> <span class="s1">&#39;&#39;</span><span class="p">:</span>
  510. <span class="k">if</span> <span class="n">extension</span> <span class="o">==</span> <span class="s1">&#39;ct&#39;</span><span class="p">:</span>
  511. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)):</span>
  512. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39; &#39;</span><span class="p">)</span>
  513. <span class="c1"># remove the &#39;#&#39;s in file names</span>
  514. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
  515. <span class="n">loadCT</span><span class="p">(</span><span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;#&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)))</span>
  516. <span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
  517. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s1">&#39;gxl&#39;</span><span class="p">:</span>
  518. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)):</span>
  519. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39; &#39;</span><span class="p">)</span>
  520. <span class="c1"># remove the &#39;#&#39;s in file names</span>
  521. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
  522. <span class="n">loadGXL</span><span class="p">(</span><span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">tmp</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;#&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)))</span>
  523. <span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
  524. <span class="k">else</span><span class="p">:</span> <span class="c1"># y in a seperate file</span>
  525. <span class="k">if</span> <span class="n">extension</span> <span class="o">==</span> <span class="s1">&#39;ct&#39;</span><span class="p">:</span>
  526. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)):</span>
  527. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
  528. <span class="c1"># remove the &#39;#&#39;s in file names</span>
  529. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
  530. <span class="n">loadCT</span><span class="p">(</span><span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">tmp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;#&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)))</span>
  531. <span class="k">elif</span> <span class="n">extension</span> <span class="o">==</span> <span class="s1">&#39;gxl&#39;</span><span class="p">:</span>
  532. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)):</span>
  533. <span class="n">tmp</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
  534. <span class="c1"># remove the &#39;#&#39;s in file names</span>
  535. <span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
  536. <span class="n">loadGXL</span><span class="p">(</span><span class="n">dirname_dataset</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">tmp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;#&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)))</span>
  537. <span class="n">content_y</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename_y</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
  538. <span class="c1"># assume entries in filename and filename_y have the same order.</span>
  539. <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">content_y</span><span class="p">:</span>
  540. <span class="n">tmp</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39; &#39;</span><span class="p">)</span>
  541. <span class="c1"># assume the 3rd entry in a line is y (for Alkane dataset)</span>
  542. <span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">tmp</span><span class="p">[</span><span class="mi">2</span><span class="p">]))</span>
  543. <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">y</span></div>
  544. <div class="viewcode-block" id="saveDataset"><a class="viewcode-back" href="../../../pygraph.utils.html#pygraph.utils.graphfiles.saveDataset">[docs]</a><span class="k">def</span> <span class="nf">saveDataset</span><span class="p">(</span><span class="n">Gn</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">gformat</span><span class="o">=</span><span class="s1">&#39;gxl&#39;</span><span class="p">,</span> <span class="n">group</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="s1">&#39;gfile&#39;</span><span class="p">,</span> <span class="n">xparams</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
  545. <span class="sd">&quot;&quot;&quot;Save list of graphs.</span>
  546. <span class="sd"> &quot;&quot;&quot;</span>
  547. <span class="kn">import</span> <span class="nn">os</span>
  548. <span class="n">dirname_ds</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
  549. <span class="k">if</span> <span class="n">dirname_ds</span> <span class="o">!=</span> <span class="s1">&#39;&#39;</span><span class="p">:</span>
  550. <span class="n">dirname_ds</span> <span class="o">+=</span> <span class="s1">&#39;/&#39;</span>
  551. <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">dirname_ds</span><span class="p">)</span> <span class="p">:</span>
  552. <span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">dirname_ds</span><span class="p">)</span>
  553. <span class="k">if</span> <span class="s1">&#39;graph_dir&#39;</span> <span class="ow">in</span> <span class="n">xparams</span><span class="p">:</span>
  554. <span class="n">graph_dir</span> <span class="o">=</span> <span class="n">xparams</span><span class="p">[</span><span class="s1">&#39;graph_dir&#39;</span><span class="p">]</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span>
  555. <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">graph_dir</span><span class="p">):</span>
  556. <span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">graph_dir</span><span class="p">)</span>
  557. <span class="k">else</span><span class="p">:</span>
  558. <span class="n">graph_dir</span> <span class="o">=</span> <span class="n">dirname_ds</span>
  559. <span class="k">if</span> <span class="n">group</span> <span class="o">==</span> <span class="s1">&#39;xml&#39;</span> <span class="ow">and</span> <span class="n">gformat</span> <span class="o">==</span> <span class="s1">&#39;gxl&#39;</span><span class="p">:</span>
  560. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span> <span class="o">+</span> <span class="s1">&#39;.xml&#39;</span><span class="p">,</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">fgroup</span><span class="p">:</span>
  561. <span class="n">fgroup</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;&lt;?xml version=</span><span class="se">\&quot;</span><span class="s2">1.0</span><span class="se">\&quot;</span><span class="s2">?&gt;&quot;</span><span class="p">)</span>
  562. <span class="n">fgroup</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&lt;!DOCTYPE GraphCollection SYSTEM </span><span class="se">\&quot;</span><span class="s2">http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd</span><span class="se">\&quot;</span><span class="s2">&gt;&quot;</span><span class="p">)</span>
  563. <span class="n">fgroup</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&lt;GraphCollection&gt;&quot;</span><span class="p">)</span>
  564. <span class="k">for</span> <span class="n">idx</span><span class="p">,</span> <span class="n">g</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">Gn</span><span class="p">):</span>
  565. <span class="n">fname_tmp</span> <span class="o">=</span> <span class="s2">&quot;graph&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">idx</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;.gxl&quot;</span>
  566. <span class="n">saveGXL</span><span class="p">(</span><span class="n">g</span><span class="p">,</span> <span class="n">graph_dir</span> <span class="o">+</span> <span class="n">fname_tmp</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="n">xparams</span><span class="p">[</span><span class="s1">&#39;method&#39;</span><span class="p">])</span>
  567. <span class="n">fgroup</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n\t</span><span class="s2">&lt;graph file=</span><span class="se">\&quot;</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="n">fname_tmp</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2"> class=</span><span class="se">\&quot;</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">y</span><span class="p">[</span><span class="n">idx</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\&quot;</span><span class="s2">/&gt;&quot;</span><span class="p">)</span>
  568. <span class="n">fgroup</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&lt;/GraphCollection&gt;&quot;</span><span class="p">)</span>
  569. <span class="n">fgroup</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
  570. <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
  571. <span class="c1"># ### Load dataset from .ds file.</span>
  572. <span class="c1"># # .ct files.</span>
  573. <span class="c1"># ds = {&#39;name&#39;: &#39;Alkane&#39;, &#39;dataset&#39;: &#39;../../datasets/Alkane/dataset.ds&#39;,</span>
  574. <span class="c1"># &#39;dataset_y&#39;: &#39;../../datasets/Alkane/dataset_boiling_point_names.txt&#39;}</span>
  575. <span class="c1"># Gn, y = loadDataset(ds[&#39;dataset&#39;], filename_y=ds[&#39;dataset_y&#39;])</span>
  576. <span class="c1">## ds = {&#39;name&#39;: &#39;Acyclic&#39;, &#39;dataset&#39;: &#39;../../datasets/acyclic/dataset_bps.ds&#39;} # node symb</span>
  577. <span class="c1">## Gn, y = loadDataset(ds[&#39;dataset&#39;])</span>
  578. <span class="c1">## ds = {&#39;name&#39;: &#39;MAO&#39;, &#39;dataset&#39;: &#39;../../datasets/MAO/dataset.ds&#39;} # node/edge symb</span>
  579. <span class="c1">## Gn, y = loadDataset(ds[&#39;dataset&#39;])</span>
  580. <span class="c1">## ds = {&#39;name&#39;: &#39;PAH&#39;, &#39;dataset&#39;: &#39;../../datasets/PAH/dataset.ds&#39;} # unlabeled</span>
  581. <span class="c1">## Gn, y = loadDataset(ds[&#39;dataset&#39;])</span>
  582. <span class="c1"># print(Gn[1].nodes(data=True))</span>
  583. <span class="c1"># print(Gn[1].edges(data=True))</span>
  584. <span class="c1"># print(y[1])</span>
  585. <span class="c1"># # .gxl file.</span>
  586. <span class="c1"># ds = {&#39;name&#39;: &#39;monoterpenoides&#39;, </span>
  587. <span class="c1"># &#39;dataset&#39;: &#39;../../datasets/monoterpenoides/dataset_10+.ds&#39;} # node/edge symb</span>
  588. <span class="c1"># Gn, y = loadDataset(ds[&#39;dataset&#39;])</span>
  589. <span class="c1"># print(Gn[1].nodes(data=True))</span>
  590. <span class="c1"># print(Gn[1].edges(data=True))</span>
  591. <span class="c1"># print(y[1])</span>
  592. <span class="c1">### Convert graph from one format to another.</span>
  593. <span class="c1"># .gxl file.</span>
  594. <span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>
  595. <span class="n">ds</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;monoterpenoides&#39;</span><span class="p">,</span>
  596. <span class="s1">&#39;dataset&#39;</span><span class="p">:</span> <span class="s1">&#39;../../datasets/monoterpenoides/dataset_10+.ds&#39;</span><span class="p">}</span> <span class="c1"># node/edge symb</span>
  597. <span class="n">Gn</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">loadDataset</span><span class="p">(</span><span class="n">ds</span><span class="p">[</span><span class="s1">&#39;dataset&#39;</span><span class="p">])</span>
  598. <span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">y</span><span class="p">]</span>
  599. <span class="nb">print</span><span class="p">(</span><span class="n">Gn</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">nodes</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
  600. <span class="nb">print</span><span class="p">(</span><span class="n">Gn</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">edges</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
  601. <span class="nb">print</span><span class="p">(</span><span class="n">y</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
  602. <span class="c1"># Convert a graph to the proper NetworkX format that can be recognized by library gedlib.</span>
  603. <span class="n">Gn_new</span> <span class="o">=</span> <span class="p">[]</span>
  604. <span class="k">for</span> <span class="n">G</span> <span class="ow">in</span> <span class="n">Gn</span><span class="p">:</span>
  605. <span class="n">G_new</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">Graph</span><span class="p">()</span>
  606. <span class="k">for</span> <span class="n">nd</span><span class="p">,</span> <span class="n">attrs</span> <span class="ow">in</span> <span class="n">G</span><span class="o">.</span><span class="n">nodes</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
  607. <span class="n">G_new</span><span class="o">.</span><span class="n">add_node</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">nd</span><span class="p">),</span> <span class="n">chem</span><span class="o">=</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;atom&#39;</span><span class="p">])</span>
  608. <span class="k">for</span> <span class="n">nd1</span><span class="p">,</span> <span class="n">nd2</span><span class="p">,</span> <span class="n">attrs</span> <span class="ow">in</span> <span class="n">G</span><span class="o">.</span><span class="n">edges</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
  609. <span class="n">G_new</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">nd1</span><span class="p">),</span> <span class="nb">str</span><span class="p">(</span><span class="n">nd2</span><span class="p">),</span> <span class="n">valence</span><span class="o">=</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;bond_type&#39;</span><span class="p">])</span>
  610. <span class="c1"># G_new.add_edge(str(nd1), str(nd2))</span>
  611. <span class="n">Gn_new</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">G_new</span><span class="p">)</span>
  612. <span class="nb">print</span><span class="p">(</span><span class="n">Gn_new</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">nodes</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
  613. <span class="nb">print</span><span class="p">(</span><span class="n">Gn_new</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">edges</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
  614. <span class="nb">print</span><span class="p">(</span><span class="n">Gn_new</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
  615. <span class="n">filename</span> <span class="o">=</span> <span class="s1">&#39;/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl/monoterpenoides&#39;</span>
  616. <span class="n">xparams</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;method&#39;</span><span class="p">:</span> <span class="s1">&#39;gedlib&#39;</span><span class="p">}</span>
  617. <span class="n">saveDataset</span><span class="p">(</span><span class="n">Gn</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">gformat</span><span class="o">=</span><span class="s1">&#39;gxl&#39;</span><span class="p">,</span> <span class="n">group</span><span class="o">=</span><span class="s1">&#39;xml&#39;</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="n">filename</span><span class="p">,</span> <span class="n">xparams</span><span class="o">=</span><span class="n">xparams</span><span class="p">)</span>
  618. <span class="c1"># ds = {&#39;name&#39;: &#39;MUTAG&#39;, &#39;dataset&#39;: &#39;../../datasets/MUTAG/MUTAG.mat&#39;,</span>
  619. <span class="c1"># &#39;extra_params&#39;: {&#39;am_sp_al_nl_el&#39;: [0, 0, 3, 1, 2]}} # node/edge symb</span>
  620. <span class="c1"># Gn, y = loadDataset(ds[&#39;dataset&#39;], extra_params=ds[&#39;extra_params&#39;])</span>
  621. <span class="c1"># saveDataset(Gn, y, group=&#39;xml&#39;, filename=&#39;temp/temp&#39;)</span>
  622. </pre></div>
  623. </div>
  624. </div>
  625. <footer>
  626. <hr/>
  627. <div role="contentinfo">
  628. <p>
  629. &copy; Copyright 2020, Linlin Jia
  630. </p>
  631. </div>
  632. Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
  633. </footer>
  634. </div>
  635. </div>
  636. </section>
  637. </div>
  638. <script type="text/javascript">
  639. jQuery(function () {
  640. SphinxRtdTheme.Navigation.enable(true);
  641. });
  642. </script>
  643. </body>
  644. </html>

A Python package for graph kernels, graph edit distances and graph pre-image problem.