You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

get_dataset_attributes.ipynb 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "scrolled": false
  8. },
  9. "outputs": [
  10. {
  11. "name": "stdout",
  12. "output_type": "stream",
  13. "text": [
  14. "\n",
  15. "Letter-med:\n",
  16. "substructures : {'linear', 'non linear'}\n",
  17. "node_labeled : False\n",
  18. "edge_labeled : False\n",
  19. "is_directed : False\n",
  20. "dataset_size : 2250\n",
  21. "ave_graph_size : 4.674666666666667\n",
  22. "min_graph_size : 1\n",
  23. "max_graph_size : 9\n",
  24. "ave_graph_edge_num : 3.2057777777777776\n",
  25. "min_graph_edge_num : 0\n",
  26. "max_graph_edge_num : 7\n",
  27. "ave_graph_degree : 2.012888888888889\n",
  28. "min_graph_degree : 0\n",
  29. "max_graph_degree : 4\n",
  30. "node_label_num : 0\n",
  31. "edge_label_num : 0\n",
  32. "node_attr_dim : 2\n",
  33. "edge_attr_dim : 0\n",
  34. "class_number : 15\n",
  35. "\n",
  36. "\n",
  37. "Mutagenicity:\n",
  38. "substructures : {'linear', 'non linear'}\n",
  39. "node_labeled : True\n",
  40. "edge_labeled : True\n",
  41. "is_directed : False\n",
  42. "dataset_size : 4337\n",
  43. "ave_graph_size : 30.317731150564907\n",
  44. "min_graph_size : 4\n",
  45. "max_graph_size : 417\n",
  46. "ave_graph_edge_num : 30.76942587041734\n",
  47. "min_graph_edge_num : 3\n",
  48. "max_graph_edge_num : 112\n",
  49. "ave_graph_degree : 3.75651371916071\n",
  50. "min_graph_degree : 3\n",
  51. "max_graph_degree : 4\n",
  52. "node_label_num : 14\n",
  53. "edge_label_num : 3\n",
  54. "node_attr_dim : 0\n",
  55. "edge_attr_dim : 0\n",
  56. "class_number : 2\n",
  57. "\n",
  58. "\n",
  59. "AIDS:\n",
  60. "substructures : {'linear', 'non linear'}\n",
  61. "node_labeled : True\n",
  62. "edge_labeled : True\n",
  63. "is_directed : False\n",
  64. "dataset_size : 2000\n",
  65. "ave_graph_size : 15.6925\n",
  66. "min_graph_size : 2\n",
  67. "max_graph_size : 95\n",
  68. "ave_graph_edge_num : 16.195\n",
  69. "min_graph_edge_num : 1\n",
  70. "max_graph_edge_num : 103\n",
  71. "ave_graph_degree : 3.322\n",
  72. "min_graph_degree : 1\n",
  73. "max_graph_degree : 6\n",
  74. "node_label_num : 38\n",
  75. "edge_label_num : 3\n",
  76. "node_attr_dim : 4\n",
  77. "edge_attr_dim : 0\n",
  78. "class_number : 2\n",
  79. "\n",
  80. "\n",
  81. "FIRSTMM_DB:\n",
  82. "substructures : {'non linear'}\n",
  83. "node_labeled : True\n",
  84. "edge_labeled : False\n",
  85. "is_directed : False\n",
  86. "dataset_size : 41\n",
  87. "ave_graph_size : 1377.2682926829268\n",
  88. "min_graph_size : 134\n",
  89. "max_graph_size : 5037\n",
  90. "ave_graph_edge_num : 3074.0975609756097\n",
  91. "min_graph_edge_num : 320\n",
  92. "max_graph_edge_num : 10888\n",
  93. "ave_graph_degree : 7.853658536585366\n",
  94. "min_graph_degree : 6\n",
  95. "max_graph_degree : 10\n",
  96. "node_label_num : 5\n",
  97. "edge_label_num : 0\n",
  98. "node_attr_dim : 1\n",
  99. "edge_attr_dim : 2\n",
  100. "class_number : 11\n",
  101. "\n",
  102. "\n",
  103. "MSRC9:\n",
  104. "substructures : {'linear', 'non linear'}\n",
  105. "node_labeled : True\n",
  106. "edge_labeled : False\n",
  107. "is_directed : False\n",
  108. "dataset_size : 221\n",
  109. "ave_graph_size : 40.57918552036199\n",
  110. "min_graph_size : 25\n",
  111. "max_graph_size : 55\n",
  112. "ave_graph_edge_num : 97.9366515837104\n",
  113. "min_graph_edge_num : 53\n",
  114. "max_graph_edge_num : 145\n",
  115. "ave_graph_degree : 10.158371040723981\n",
  116. "min_graph_degree : 8\n",
  117. "max_graph_degree : 16\n",
  118. "node_label_num : 10\n",
  119. "edge_label_num : 0\n",
  120. "node_attr_dim : 0\n",
  121. "edge_attr_dim : 0\n",
  122. "class_number : 8\n",
  123. "\n",
  124. "\n",
  125. "MSRC21:\n",
  126. "substructures : {'linear', 'non linear'}\n",
  127. "node_labeled : True\n",
  128. "edge_labeled : False\n",
  129. "is_directed : False\n",
  130. "dataset_size : 563\n",
  131. "ave_graph_size : 77.52042628774423\n",
  132. "min_graph_size : 51\n",
  133. "max_graph_size : 141\n",
  134. "ave_graph_edge_num : 198.32326820603907\n",
  135. "min_graph_edge_num : 121\n",
  136. "max_graph_edge_num : 405\n",
  137. "ave_graph_degree : 11.41563055062167\n",
  138. "min_graph_degree : 8\n",
  139. "max_graph_degree : 23\n",
  140. "node_label_num : 22\n",
  141. "edge_label_num : 0\n",
  142. "node_attr_dim : 0\n",
  143. "edge_attr_dim : 0\n",
  144. "class_number : 20\n",
  145. "\n",
  146. "\n",
  147. "SYNTHETIC:\n",
  148. "substructures : {'linear', 'non linear'}\n",
  149. "node_labeled : True\n",
  150. "edge_labeled : False\n",
  151. "is_directed : False\n",
  152. "dataset_size : 300\n",
  153. "ave_graph_size : 100.0\n",
  154. "min_graph_size : 100\n",
  155. "max_graph_size : 100\n",
  156. "ave_graph_edge_num : 196.0\n",
  157. "min_graph_edge_num : 196\n",
  158. "max_graph_edge_num : 196\n",
  159. "ave_graph_degree : 8.0\n",
  160. "min_graph_degree : 8\n",
  161. "max_graph_degree : 8\n",
  162. "node_label_num : 8\n",
  163. "edge_label_num : 0\n",
  164. "node_attr_dim : 1\n",
  165. "edge_attr_dim : 0\n",
  166. "class_number : 2\n",
  167. "\n",
  168. "\n",
  169. "BZR:\n",
  170. "substructures : {'linear', 'non linear'}\n",
  171. "node_labeled : True\n",
  172. "edge_labeled : False\n",
  173. "is_directed : False\n",
  174. "dataset_size : 405\n",
  175. "ave_graph_size : 35.75061728395062\n",
  176. "min_graph_size : 13\n",
  177. "max_graph_size : 57\n",
  178. "ave_graph_edge_num : 38.358024691358025\n",
  179. "min_graph_edge_num : 13\n",
  180. "max_graph_edge_num : 60\n",
  181. "ave_graph_degree : 3.8641975308641974\n",
  182. "min_graph_degree : 3\n",
  183. "max_graph_degree : 4\n",
  184. "node_label_num : 10\n",
  185. "edge_label_num : 0\n",
  186. "node_attr_dim : 3\n",
  187. "edge_attr_dim : 0\n",
  188. "class_number : 2\n",
  189. "\n",
  190. "\n",
  191. "COX2:\n",
  192. "substructures : {'linear', 'non linear'}\n",
  193. "node_labeled : True\n",
  194. "edge_labeled : False\n",
  195. "is_directed : False\n",
  196. "dataset_size : 467\n",
  197. "ave_graph_size : 41.224839400428266\n",
  198. "min_graph_size : 32\n",
  199. "max_graph_size : 56\n",
  200. "ave_graph_edge_num : 43.44539614561028\n",
  201. "min_graph_edge_num : 34\n",
  202. "max_graph_edge_num : 59\n",
  203. "ave_graph_degree : 4.0\n",
  204. "min_graph_degree : 4\n",
  205. "max_graph_degree : 4\n",
  206. "node_label_num : 8\n",
  207. "edge_label_num : 0\n",
  208. "node_attr_dim : 3\n",
  209. "edge_attr_dim : 0\n",
  210. "class_number : 2\n",
  211. "\n",
  212. "\n",
  213. "DHFR:\n",
  214. "substructures : {'linear', 'non linear'}\n",
  215. "node_labeled : True\n",
  216. "edge_labeled : False\n",
  217. "is_directed : False\n",
  218. "dataset_size : 756\n",
  219. "ave_graph_size : 42.42724867724868\n",
  220. "min_graph_size : 20\n",
  221. "max_graph_size : 71\n",
  222. "ave_graph_edge_num : 44.544973544973544\n",
  223. "min_graph_edge_num : 21\n",
  224. "max_graph_edge_num : 73\n",
  225. "ave_graph_degree : 3.955026455026455\n",
  226. "min_graph_degree : 3\n",
  227. "max_graph_degree : 4\n",
  228. "node_label_num : 9\n",
  229. "edge_label_num : 0\n",
  230. "node_attr_dim : 3\n",
  231. "edge_attr_dim : 0\n",
  232. "class_number : 2\n",
  233. "\n",
  234. "\n",
  235. "ENZYMES:\n",
  236. "substructures : {'linear', 'non linear'}\n",
  237. "node_labeled : True\n",
  238. "edge_labeled : False\n",
  239. "is_directed : False\n",
  240. "dataset_size : 600\n",
  241. "ave_graph_size : 32.63333333333333\n",
  242. "min_graph_size : 2\n",
  243. "max_graph_size : 126\n",
  244. "ave_graph_edge_num : 62.13666666666666\n",
  245. "min_graph_edge_num : 1\n",
  246. "max_graph_edge_num : 149\n",
  247. "ave_graph_degree : 6.086666666666667\n",
  248. "min_graph_degree : 1\n",
  249. "max_graph_degree : 9\n",
  250. "node_label_num : 3\n",
  251. "edge_label_num : 0\n",
  252. "node_attr_dim : 18\n",
  253. "edge_attr_dim : 0\n",
  254. "class_number : 6\n",
  255. "\n",
  256. "\n",
  257. "PROTEINS:\n",
  258. "substructures : {'linear', 'non linear'}\n",
  259. "node_labeled : True\n",
  260. "edge_labeled : False\n",
  261. "is_directed : False\n",
  262. "dataset_size : 1113\n",
  263. "ave_graph_size : 39.05750224618149\n",
  264. "min_graph_size : 4\n",
  265. "max_graph_size : 620\n",
  266. "ave_graph_edge_num : 72.8158131176999\n",
  267. "min_graph_edge_num : 5\n",
  268. "max_graph_edge_num : 1049\n",
  269. "ave_graph_degree : 5.794249775381851\n",
  270. "min_graph_degree : 3\n",
  271. "max_graph_degree : 25\n",
  272. "node_label_num : 3\n",
  273. "edge_label_num : 0\n",
  274. "node_attr_dim : 1\n",
  275. "edge_attr_dim : 0\n",
  276. "class_number : 2\n",
  277. "\n",
  278. "\n",
  279. "PROTEINS_full:\n",
  280. "substructures : {'linear', 'non linear'}\n",
  281. "node_labeled : True\n",
  282. "edge_labeled : False\n",
  283. "is_directed : False\n",
  284. "dataset_size : 1113\n",
  285. "ave_graph_size : 39.05750224618149\n",
  286. "min_graph_size : 4\n",
  287. "max_graph_size : 620\n",
  288. "ave_graph_edge_num : 72.8158131176999\n",
  289. "min_graph_edge_num : 5\n",
  290. "max_graph_edge_num : 1049\n",
  291. "ave_graph_degree : 5.794249775381851\n",
  292. "min_graph_degree : 3\n",
  293. "max_graph_degree : 25\n",
  294. "node_label_num : 3\n",
  295. "edge_label_num : 0\n",
  296. "node_attr_dim : 29\n",
  297. "edge_attr_dim : 0\n",
  298. "class_number : 2\n",
  299. "\n",
  300. "\n",
  301. "D&D:\n",
  302. "substructures : {'linear', 'non linear'}\n",
  303. "node_labeled : True\n",
  304. "edge_labeled : False\n",
  305. "is_directed : False\n",
  306. "dataset_size : 1178\n",
  307. "ave_graph_size : 284.3166383701188\n",
  308. "min_graph_size : 30\n",
  309. "max_graph_size : 5748\n",
  310. "ave_graph_edge_num : 715.6587436332767\n",
  311. "min_graph_edge_num : 63\n",
  312. "max_graph_edge_num : 14267\n",
  313. "ave_graph_degree : 9.509337860780985\n",
  314. "min_graph_degree : 6\n",
  315. "max_graph_degree : 19\n",
  316. "node_label_num : 82\n",
  317. "edge_label_num : 0\n",
  318. "node_attr_dim : 0\n",
  319. "edge_attr_dim : 0\n",
  320. "class_number : 2\n",
  321. "\n",
  322. "\n",
  323. "MUTAG:\n",
  324. "substructures : {'linear', 'non linear'}\n",
  325. "node_labeled : True\n",
  326. "edge_labeled : True\n",
  327. "is_directed : False\n",
  328. "dataset_size : 188\n",
  329. "ave_graph_size : 17.930851063829788\n",
  330. "min_graph_size : 10\n",
  331. "max_graph_size : 28\n",
  332. "ave_graph_edge_num : 19.79255319148936\n",
  333. "min_graph_edge_num : 10\n",
  334. "max_graph_edge_num : 33\n",
  335. "ave_graph_degree : 3.00531914893617\n",
  336. "min_graph_degree : 3\n",
  337. "max_graph_degree : 4\n",
  338. "node_label_num : 7\n",
  339. "edge_label_num : 11\n",
  340. "node_attr_dim : 0\n",
  341. "edge_attr_dim : 0\n",
  342. "class_number : 2\n",
  343. "\n",
  344. "\n",
  345. "Alkane:\n",
  346. "substructures : {'linear', 'non linear'}\n",
  347. "node_labeled : True\n",
  348. "edge_labeled : False\n",
  349. "is_directed : False\n",
  350. "dataset_size : 150\n",
  351. "ave_graph_size : 8.873333333333333\n",
  352. "min_graph_size : 1\n",
  353. "max_graph_size : 10\n",
  354. "ave_graph_edge_num : 7.873333333333333\n",
  355. "min_graph_edge_num : 0\n",
  356. "max_graph_edge_num : 9\n",
  357. "ave_graph_degree : 3.36\n",
  358. "min_graph_degree : 0\n",
  359. "max_graph_degree : 4\n",
  360. "node_label_num : 2\n",
  361. "edge_label_num : 1\n",
  362. "node_attr_dim : 0\n",
  363. "edge_attr_dim : 0\n",
  364. "class_number : 123\n",
  365. "\n",
  366. "\n",
  367. "Acyclic:\n",
  368. "substructures : {'linear', 'non linear'}\n",
  369. "node_labeled : True\n",
  370. "edge_labeled : False\n",
  371. "is_directed : False\n",
  372. "dataset_size : 183\n",
  373. "ave_graph_size : 8.153005464480874\n",
  374. "min_graph_size : 3\n",
  375. "max_graph_size : 11\n",
  376. "ave_graph_edge_num : 7.1530054644808745\n",
  377. "min_graph_edge_num : 2\n",
  378. "max_graph_edge_num : 10\n",
  379. "ave_graph_degree : 2.80327868852459\n",
  380. "min_graph_degree : 2\n",
  381. "max_graph_degree : 4\n",
  382. "node_label_num : 3\n",
  383. "edge_label_num : 1\n",
  384. "node_attr_dim : 0\n",
  385. "edge_attr_dim : 0\n",
  386. "class_number : 148\n",
  387. "\n",
  388. "\n",
  389. "MAO:\n",
  390. "substructures : {'linear', 'non linear'}\n",
  391. "node_labeled : True\n",
  392. "edge_labeled : True\n",
  393. "is_directed : False\n",
  394. "dataset_size : 68\n",
  395. "ave_graph_size : 18.38235294117647\n",
  396. "min_graph_size : 11\n",
  397. "max_graph_size : 27\n",
  398. "ave_graph_edge_num : 19.63235294117647\n",
  399. "min_graph_edge_num : 12\n",
  400. "max_graph_edge_num : 29\n",
  401. "ave_graph_degree : 3.0\n",
  402. "min_graph_degree : 3\n",
  403. "max_graph_degree : 3\n",
  404. "node_label_num : 3\n",
  405. "edge_label_num : 4\n",
  406. "node_attr_dim : 0\n",
  407. "edge_attr_dim : 0\n",
  408. "class_number : 2\n",
  409. "\n"
  410. ]
  411. },
  412. {
  413. "name": "stdout",
  414. "output_type": "stream",
  415. "text": [
  416. "\n",
  417. "PAH:\n",
  418. "substructures : {'linear', 'non linear'}\n",
  419. "node_labeled : False\n",
  420. "edge_labeled : False\n",
  421. "is_directed : False\n",
  422. "dataset_size : 94\n",
  423. "ave_graph_size : 20.70212765957447\n",
  424. "min_graph_size : 10\n",
  425. "max_graph_size : 28\n",
  426. "ave_graph_edge_num : 24.425531914893618\n",
  427. "min_graph_edge_num : 11\n",
  428. "max_graph_edge_num : 34\n",
  429. "ave_graph_degree : 3.0106382978723403\n",
  430. "min_graph_degree : 3\n",
  431. "max_graph_degree : 4\n",
  432. "node_label_num : 1\n",
  433. "edge_label_num : 1\n",
  434. "node_attr_dim : 0\n",
  435. "edge_attr_dim : 0\n",
  436. "class_number : 2\n",
  437. "\n",
  438. "\n",
  439. "NCI1:\n",
  440. "substructures : {'linear', 'non linear'}\n",
  441. "node_labeled : True\n",
  442. "edge_labeled : False\n",
  443. "is_directed : False\n",
  444. "dataset_size : 4110\n",
  445. "ave_graph_size : 29.8654501216545\n",
  446. "min_graph_size : 3\n",
  447. "max_graph_size : 111\n",
  448. "ave_graph_edge_num : 32.3\n",
  449. "min_graph_edge_num : 2\n",
  450. "max_graph_edge_num : 119\n",
  451. "ave_graph_degree : 3.3360097323600972\n",
  452. "min_graph_degree : 2\n",
  453. "max_graph_degree : 4\n",
  454. "node_label_num : 37\n",
  455. "edge_label_num : 0\n",
  456. "node_attr_dim : 0\n",
  457. "edge_attr_dim : 0\n",
  458. "class_number : 2\n",
  459. "\n",
  460. "\n",
  461. "NCI109:\n",
  462. "substructures : {'linear', 'non linear'}\n",
  463. "node_labeled : True\n",
  464. "edge_labeled : False\n",
  465. "is_directed : False\n",
  466. "dataset_size : 4127\n",
  467. "ave_graph_size : 29.681124303368065\n",
  468. "min_graph_size : 4\n",
  469. "max_graph_size : 111\n",
  470. "ave_graph_edge_num : 32.13084565059365\n",
  471. "min_graph_edge_num : 3\n",
  472. "max_graph_edge_num : 119\n",
  473. "ave_graph_degree : 3.343833292948873\n",
  474. "min_graph_degree : 2\n",
  475. "max_graph_degree : 5\n",
  476. "node_label_num : 38\n",
  477. "edge_label_num : 0\n",
  478. "node_attr_dim : 0\n",
  479. "edge_attr_dim : 0\n",
  480. "class_number : 2\n",
  481. "\n"
  482. ]
  483. },
  484. {
  485. "ename": "ModuleNotFoundError",
  486. "evalue": "No module named 'tqdm'",
  487. "output_type": "error",
  488. "traceback": [
  489. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  490. "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
  491. "\u001b[0;32m<ipython-input-1-1e4da065c026>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dataset'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0mfilename_y\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dataset_y'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'dataset_y'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mds\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m extra_params=(ds['extra_params'] if 'extra_params' in ds else None))\n\u001b[0m\u001b[1;32m 48\u001b[0m attrs = get_dataset_attributes(\n\u001b[1;32m 49\u001b[0m dataset, target=y, node_label='atom', edge_label='bond_type')\n",
  492. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/graphfiles.py\u001b[0m in \u001b[0;36mloadDataset\u001b[0;34m(filename, filename_y, extra_params)\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mextension\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"sdf\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 379\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtqdm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 380\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  493. "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tqdm'"
  494. ]
  495. }
  496. ],
  497. "source": [
  498. "import sys\n",
  499. "sys.path.insert(0, \"../\")\n",
  500. "from pygraph.utils.graphfiles import loadDataset\n",
  501. "from pygraph.utils.graphdataset import get_dataset_attributes\n",
  502. "\n",
  503. "dslist = [\n",
  504. " {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
  505. " {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
  506. " {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},\n",
  507. " {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n",
  508. " {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n",
  509. " {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n",
  510. " {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n",
  511. " {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},\n",
  512. " {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},\n",
  513. " {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'},\n",
  514. " {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
  515. " {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n",
  516. " {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},\n",
  517. " {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
  518. " 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n",
  519. " {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
  520. " 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n",
  521. " {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',\n",
  522. " 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},\n",
  523. " {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},\n",
  524. " {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},\n",
  525. " {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},\n",
  526. " {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
  527. " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
  528. " {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
  529. " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
  530. " {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
  531. " 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',},\n",
  532. "\n",
  533. "# # not working below\n",
  534. "# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
  535. "# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
  536. "# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
  537. "# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
  538. "]\n",
  539. "\n",
  540. "for ds in dslist:\n",
  541. " dataset, y = loadDataset(\n",
  542. " ds['dataset'],\n",
  543. " filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
  544. " extra_params=(ds['extra_params'] if 'extra_params' in ds else None))\n",
  545. " attrs = get_dataset_attributes(\n",
  546. " dataset, target=y, node_label='atom', edge_label='bond_type')\n",
  547. " print()\n",
  548. " print(ds['name'] + ':')\n",
  549. " for atr in attrs:\n",
  550. " print(atr, ':', attrs[atr])\n",
  551. " print()"
  552. ]
  553. }
  554. ],
  555. "metadata": {
  556. "kernelspec": {
  557. "display_name": "Python 3",
  558. "language": "python",
  559. "name": "python3"
  560. },
  561. "language_info": {
  562. "codemirror_mode": {
  563. "name": "ipython",
  564. "version": 3
  565. },
  566. "file_extension": ".py",
  567. "mimetype": "text/x-python",
  568. "name": "python",
  569. "nbconvert_exporter": "python",
  570. "pygments_lexer": "ipython3",
  571. "version": "3.6.5"
  572. }
  573. },
  574. "nbformat": 4,
  575. "nbformat_minor": 2
  576. }

A Python package for graph kernels, graph edit distances and graph pre-image problem.