You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

get_dataset_attributes.ipynb 19 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "scrolled": false
  8. },
  9. "outputs": [
  10. {
  11. "name": "stdout",
  12. "output_type": "stream",
  13. "text": [
  14. "\n",
  15. "Acyclic:\n",
  16. "substructures : {'non linear', 'linear'}\n",
  17. "node_labeled : True\n",
  18. "edge_labeled : False\n",
  19. "is_directed : False\n",
  20. "dataset_size : 183\n",
  21. "ave_node_num : 8.153005464480874\n",
  22. "min_node_num : 3\n",
  23. "max_node_num : 11\n",
  24. "ave_edge_num : 7.1530054644808745\n",
  25. "min_edge_num : 2\n",
  26. "max_edge_num : 10\n",
  27. "ave_node_degree : 2.80327868852459\n",
  28. "min_node_degree : 2\n",
  29. "max_node_degree : 4\n",
  30. "node_label_num : 3\n",
  31. "edge_label_num : 1\n",
  32. "node_attr_dim : 0\n",
  33. "edge_attr_dim : 0\n",
  34. "class_number : 148\n",
  35. "\n",
  36. "\n",
  37. "Alkane:\n",
  38. "substructures : {'non linear', 'linear'}\n",
  39. "node_labeled : True\n",
  40. "edge_labeled : False\n",
  41. "is_directed : False\n",
  42. "dataset_size : 150\n",
  43. "ave_node_num : 8.873333333333333\n",
  44. "min_node_num : 1\n",
  45. "max_node_num : 10\n",
  46. "ave_edge_num : 7.873333333333333\n",
  47. "min_edge_num : 0\n",
  48. "max_edge_num : 9\n",
  49. "ave_node_degree : 3.36\n",
  50. "min_node_degree : 0\n",
  51. "max_node_degree : 4\n",
  52. "node_label_num : 2\n",
  53. "edge_label_num : 1\n",
  54. "node_attr_dim : 0\n",
  55. "edge_attr_dim : 0\n",
  56. "class_number : 123\n",
  57. "\n",
  58. "\n",
  59. "MAO:\n",
  60. "substructures : {'non linear', 'linear'}\n",
  61. "node_labeled : True\n",
  62. "edge_labeled : True\n",
  63. "is_directed : False\n",
  64. "dataset_size : 68\n",
  65. "ave_node_num : 18.38235294117647\n",
  66. "min_node_num : 11\n",
  67. "max_node_num : 27\n",
  68. "ave_edge_num : 19.63235294117647\n",
  69. "min_edge_num : 12\n",
  70. "max_edge_num : 29\n",
  71. "ave_node_degree : 3.0\n",
  72. "min_node_degree : 3\n",
  73. "max_node_degree : 3\n",
  74. "node_label_num : 3\n",
  75. "edge_label_num : 4\n",
  76. "node_attr_dim : 0\n",
  77. "edge_attr_dim : 0\n",
  78. "class_number : 2\n",
  79. "\n",
  80. "\n",
  81. "PAH:\n",
  82. "substructures : {'non linear', 'linear'}\n",
  83. "node_labeled : False\n",
  84. "edge_labeled : False\n",
  85. "is_directed : False\n",
  86. "dataset_size : 94\n",
  87. "ave_node_num : 20.70212765957447\n",
  88. "min_node_num : 10\n",
  89. "max_node_num : 28\n",
  90. "ave_edge_num : 24.425531914893618\n",
  91. "min_edge_num : 11\n",
  92. "max_edge_num : 34\n",
  93. "ave_node_degree : 3.0106382978723403\n",
  94. "min_node_degree : 3\n",
  95. "max_node_degree : 4\n",
  96. "node_label_num : 1\n",
  97. "edge_label_num : 1\n",
  98. "node_attr_dim : 0\n",
  99. "edge_attr_dim : 0\n",
  100. "class_number : 2\n",
  101. "\n",
  102. "\n",
  103. "MUTAG:\n",
  104. "substructures : {'non linear', 'linear'}\n",
  105. "node_labeled : True\n",
  106. "edge_labeled : True\n",
  107. "is_directed : False\n",
  108. "dataset_size : 188\n",
  109. "ave_node_num : 17.930851063829788\n",
  110. "min_node_num : 10\n",
  111. "max_node_num : 28\n",
  112. "ave_edge_num : 19.79255319148936\n",
  113. "min_edge_num : 10\n",
  114. "max_edge_num : 33\n",
  115. "ave_node_degree : 3.00531914893617\n",
  116. "min_node_degree : 3\n",
  117. "max_node_degree : 4\n",
  118. "node_label_num : 7\n",
  119. "edge_label_num : 11\n",
  120. "node_attr_dim : 0\n",
  121. "edge_attr_dim : 0\n",
  122. "class_number : 2\n",
  123. "\n",
  124. "\n",
  125. "Letter-med:\n",
  126. "substructures : {'non linear', 'linear'}\n",
  127. "node_labeled : False\n",
  128. "edge_labeled : False\n",
  129. "is_directed : False\n",
  130. "dataset_size : 2250\n",
  131. "ave_node_num : 4.674666666666667\n",
  132. "min_node_num : 1\n",
  133. "max_node_num : 9\n",
  134. "ave_edge_num : 3.2057777777777776\n",
  135. "min_edge_num : 0\n",
  136. "max_edge_num : 7\n",
  137. "ave_node_degree : 2.012888888888889\n",
  138. "min_node_degree : 0\n",
  139. "max_node_degree : 4\n",
  140. "node_label_num : 0\n",
  141. "edge_label_num : 0\n",
  142. "node_attr_dim : 2\n",
  143. "edge_attr_dim : 0\n",
  144. "class_number : 15\n",
  145. "\n",
  146. "\n",
  147. "ENZYMES:\n",
  148. "substructures : {'non linear', 'linear'}\n",
  149. "node_labeled : True\n",
  150. "edge_labeled : False\n",
  151. "is_directed : False\n",
  152. "dataset_size : 600\n",
  153. "ave_node_num : 32.63333333333333\n",
  154. "min_node_num : 2\n",
  155. "max_node_num : 126\n",
  156. "ave_edge_num : 62.13666666666666\n",
  157. "min_edge_num : 1\n",
  158. "max_edge_num : 149\n",
  159. "ave_node_degree : 6.086666666666667\n",
  160. "min_node_degree : 1\n",
  161. "max_node_degree : 9\n",
  162. "node_label_num : 3\n",
  163. "edge_label_num : 0\n",
  164. "node_attr_dim : 18\n",
  165. "edge_attr_dim : 0\n",
  166. "class_number : 6\n",
  167. "\n",
  168. "\n",
  169. "Mutagenicity:\n",
  170. "substructures : {'non linear', 'linear'}\n",
  171. "node_labeled : True\n",
  172. "edge_labeled : True\n",
  173. "is_directed : False\n",
  174. "dataset_size : 4337\n",
  175. "ave_node_num : 30.317731150564907\n",
  176. "min_node_num : 4\n",
  177. "max_node_num : 417\n",
  178. "ave_edge_num : 30.76942587041734\n",
  179. "min_edge_num : 3\n",
  180. "max_edge_num : 112\n",
  181. "ave_node_degree : 3.75651371916071\n",
  182. "min_node_degree : 3\n",
  183. "max_node_degree : 4\n",
  184. "node_label_num : 14\n",
  185. "edge_label_num : 3\n",
  186. "node_attr_dim : 0\n",
  187. "edge_attr_dim : 0\n",
  188. "class_number : 2\n",
  189. "\n",
  190. "\n",
  191. "D&D:\n",
  192. "substructures : {'non linear', 'linear'}\n",
  193. "node_labeled : True\n",
  194. "edge_labeled : False\n",
  195. "is_directed : False\n",
  196. "dataset_size : 1178\n",
  197. "ave_node_num : 284.3166383701188\n",
  198. "min_node_num : 30\n",
  199. "max_node_num : 5748\n",
  200. "ave_edge_num : 715.6587436332767\n",
  201. "min_edge_num : 63\n",
  202. "max_edge_num : 14267\n",
  203. "ave_node_degree : 9.509337860780985\n",
  204. "min_node_degree : 6\n",
  205. "max_node_degree : 19\n",
  206. "node_label_num : 82\n",
  207. "edge_label_num : 0\n",
  208. "node_attr_dim : 0\n",
  209. "edge_attr_dim : 0\n",
  210. "class_number : 2\n",
  211. "\n",
  212. "\n",
  213. "AIDS:\n",
  214. "substructures : {'non linear', 'linear'}\n",
  215. "node_labeled : True\n",
  216. "edge_labeled : True\n",
  217. "is_directed : False\n",
  218. "dataset_size : 2000\n",
  219. "ave_node_num : 15.6925\n",
  220. "min_node_num : 2\n",
  221. "max_node_num : 95\n",
  222. "ave_edge_num : 16.195\n",
  223. "min_edge_num : 1\n",
  224. "max_edge_num : 103\n",
  225. "ave_node_degree : 3.322\n",
  226. "min_node_degree : 1\n",
  227. "max_node_degree : 6\n",
  228. "node_label_num : 38\n",
  229. "edge_label_num : 3\n",
  230. "node_attr_dim : 4\n",
  231. "edge_attr_dim : 0\n",
  232. "class_number : 2\n",
  233. "\n",
  234. "\n",
  235. "FIRSTMM_DB:\n",
  236. "substructures : {'non linear'}\n",
  237. "node_labeled : True\n",
  238. "edge_labeled : False\n",
  239. "is_directed : False\n",
  240. "dataset_size : 41\n",
  241. "ave_node_num : 1377.2682926829268\n",
  242. "min_node_num : 134\n",
  243. "max_node_num : 5037\n",
  244. "ave_edge_num : 3074.0975609756097\n",
  245. "min_edge_num : 320\n",
  246. "max_edge_num : 10888\n",
  247. "ave_node_degree : 7.853658536585366\n",
  248. "min_node_degree : 6\n",
  249. "max_node_degree : 10\n",
  250. "node_label_num : 5\n",
  251. "edge_label_num : 0\n",
  252. "node_attr_dim : 1\n",
  253. "edge_attr_dim : 2\n",
  254. "class_number : 11\n",
  255. "\n",
  256. "\n",
  257. "MSRC9:\n",
  258. "substructures : {'non linear', 'linear'}\n",
  259. "node_labeled : True\n",
  260. "edge_labeled : False\n",
  261. "is_directed : False\n",
  262. "dataset_size : 221\n",
  263. "ave_node_num : 40.57918552036199\n",
  264. "min_node_num : 25\n",
  265. "max_node_num : 55\n",
  266. "ave_edge_num : 97.9366515837104\n",
  267. "min_edge_num : 53\n",
  268. "max_edge_num : 145\n",
  269. "ave_node_degree : 10.158371040723981\n",
  270. "min_node_degree : 8\n",
  271. "max_node_degree : 16\n",
  272. "node_label_num : 10\n",
  273. "edge_label_num : 0\n",
  274. "node_attr_dim : 0\n",
  275. "edge_attr_dim : 0\n",
  276. "class_number : 8\n",
  277. "\n",
  278. "\n",
  279. "MSRC21:\n",
  280. "substructures : {'non linear', 'linear'}\n",
  281. "node_labeled : True\n",
  282. "edge_labeled : False\n",
  283. "is_directed : False\n",
  284. "dataset_size : 563\n",
  285. "ave_node_num : 77.52042628774423\n",
  286. "min_node_num : 51\n",
  287. "max_node_num : 141\n",
  288. "ave_edge_num : 198.32326820603907\n",
  289. "min_edge_num : 121\n",
  290. "max_edge_num : 405\n",
  291. "ave_node_degree : 11.41563055062167\n",
  292. "min_node_degree : 8\n",
  293. "max_node_degree : 23\n",
  294. "node_label_num : 22\n",
  295. "edge_label_num : 0\n",
  296. "node_attr_dim : 0\n",
  297. "edge_attr_dim : 0\n",
  298. "class_number : 20\n",
  299. "\n",
  300. "\n",
  301. "SYNTHETIC:\n",
  302. "substructures : {'non linear', 'linear'}\n",
  303. "node_labeled : True\n",
  304. "edge_labeled : False\n",
  305. "is_directed : False\n",
  306. "dataset_size : 300\n",
  307. "ave_node_num : 100.0\n",
  308. "min_node_num : 100\n",
  309. "max_node_num : 100\n",
  310. "ave_edge_num : 196.0\n",
  311. "min_edge_num : 196\n",
  312. "max_edge_num : 196\n",
  313. "ave_node_degree : 8.0\n",
  314. "min_node_degree : 8\n",
  315. "max_node_degree : 8\n",
  316. "node_label_num : 8\n",
  317. "edge_label_num : 0\n",
  318. "node_attr_dim : 1\n",
  319. "edge_attr_dim : 0\n",
  320. "class_number : 2\n",
  321. "\n",
  322. "\n",
  323. "BZR:\n",
  324. "substructures : {'non linear', 'linear'}\n",
  325. "node_labeled : True\n",
  326. "edge_labeled : False\n",
  327. "is_directed : False\n",
  328. "dataset_size : 405\n",
  329. "ave_node_num : 35.75061728395062\n",
  330. "min_node_num : 13\n",
  331. "max_node_num : 57\n",
  332. "ave_edge_num : 38.358024691358025\n",
  333. "min_edge_num : 13\n",
  334. "max_edge_num : 60\n",
  335. "ave_node_degree : 3.8641975308641974\n",
  336. "min_node_degree : 3\n",
  337. "max_node_degree : 4\n",
  338. "node_label_num : 10\n",
  339. "edge_label_num : 0\n",
  340. "node_attr_dim : 3\n",
  341. "edge_attr_dim : 0\n",
  342. "class_number : 2\n",
  343. "\n",
  344. "\n",
  345. "COX2:\n",
  346. "substructures : {'non linear', 'linear'}\n",
  347. "node_labeled : True\n",
  348. "edge_labeled : False\n",
  349. "is_directed : False\n",
  350. "dataset_size : 467\n",
  351. "ave_node_num : 41.224839400428266\n",
  352. "min_node_num : 32\n",
  353. "max_node_num : 56\n",
  354. "ave_edge_num : 43.44539614561028\n",
  355. "min_edge_num : 34\n",
  356. "max_edge_num : 59\n",
  357. "ave_node_degree : 4.0\n",
  358. "min_node_degree : 4\n",
  359. "max_node_degree : 4\n",
  360. "node_label_num : 8\n",
  361. "edge_label_num : 0\n",
  362. "node_attr_dim : 3\n",
  363. "edge_attr_dim : 0\n",
  364. "class_number : 2\n",
  365. "\n",
  366. "\n",
  367. "DHFR:\n",
  368. "substructures : {'non linear', 'linear'}\n",
  369. "node_labeled : True\n",
  370. "edge_labeled : False\n",
  371. "is_directed : False\n",
  372. "dataset_size : 756\n",
  373. "ave_node_num : 42.42724867724868\n",
  374. "min_node_num : 20\n",
  375. "max_node_num : 71\n",
  376. "ave_edge_num : 44.544973544973544\n",
  377. "min_edge_num : 21\n",
  378. "max_edge_num : 73\n",
  379. "ave_node_degree : 3.955026455026455\n",
  380. "min_node_degree : 3\n",
  381. "max_node_degree : 4\n",
  382. "node_label_num : 9\n",
  383. "edge_label_num : 0\n",
  384. "node_attr_dim : 3\n",
  385. "edge_attr_dim : 0\n",
  386. "class_number : 2\n",
  387. "\n",
  388. "\n",
  389. "PROTEINS:\n",
  390. "substructures : {'non linear', 'linear'}\n",
  391. "node_labeled : True\n",
  392. "edge_labeled : False\n",
  393. "is_directed : False\n",
  394. "dataset_size : 1113\n",
  395. "ave_node_num : 39.05750224618149\n",
  396. "min_node_num : 4\n",
  397. "max_node_num : 620\n",
  398. "ave_edge_num : 72.8158131176999\n",
  399. "min_edge_num : 5\n",
  400. "max_edge_num : 1049\n",
  401. "ave_node_degree : 5.794249775381851\n",
  402. "min_node_degree : 3\n",
  403. "max_node_degree : 25\n",
  404. "node_label_num : 3\n",
  405. "edge_label_num : 0\n",
  406. "node_attr_dim : 1\n",
  407. "edge_attr_dim : 0\n",
  408. "class_number : 2\n",
  409. "\n",
  410. "\n",
  411. "PROTEINS_full:\n",
  412. "substructures : {'non linear', 'linear'}\n",
  413. "node_labeled : True\n",
  414. "edge_labeled : False\n",
  415. "is_directed : False\n",
  416. "dataset_size : 1113\n",
  417. "ave_node_num : 39.05750224618149\n",
  418. "min_node_num : 4\n",
  419. "max_node_num : 620\n",
  420. "ave_edge_num : 72.8158131176999\n",
  421. "min_edge_num : 5\n",
  422. "max_edge_num : 1049\n",
  423. "ave_node_degree : 5.794249775381851\n",
  424. "min_node_degree : 3\n",
  425. "max_node_degree : 25\n",
  426. "node_label_num : 3\n",
  427. "edge_label_num : 0\n",
  428. "node_attr_dim : 29\n",
  429. "edge_attr_dim : 0\n",
  430. "class_number : 2\n",
  431. "\n"
  432. ]
  433. },
  434. {
  435. "name": "stdout",
  436. "output_type": "stream",
  437. "text": [
  438. "\n",
  439. "NCI1:\n",
  440. "substructures : {'non linear', 'linear'}\n",
  441. "node_labeled : True\n",
  442. "edge_labeled : False\n",
  443. "is_directed : False\n",
  444. "dataset_size : 4110\n",
  445. "ave_node_num : 29.8654501216545\n",
  446. "min_node_num : 3\n",
  447. "max_node_num : 111\n",
  448. "ave_edge_num : 32.3\n",
  449. "min_edge_num : 2\n",
  450. "max_edge_num : 119\n",
  451. "ave_node_degree : 3.3360097323600972\n",
  452. "min_node_degree : 2\n",
  453. "max_node_degree : 4\n",
  454. "node_label_num : 37\n",
  455. "edge_label_num : 0\n",
  456. "node_attr_dim : 0\n",
  457. "edge_attr_dim : 0\n",
  458. "class_number : 2\n",
  459. "\n",
  460. "\n",
  461. "NCI109:\n",
  462. "substructures : {'non linear', 'linear'}\n",
  463. "node_labeled : True\n",
  464. "edge_labeled : False\n",
  465. "is_directed : False\n",
  466. "dataset_size : 4127\n",
  467. "ave_node_num : 29.681124303368065\n",
  468. "min_node_num : 4\n",
  469. "max_node_num : 111\n",
  470. "ave_edge_num : 32.13084565059365\n",
  471. "min_edge_num : 3\n",
  472. "max_edge_num : 119\n",
  473. "ave_node_degree : 3.343833292948873\n",
  474. "min_node_degree : 2\n",
  475. "max_node_degree : 5\n",
  476. "node_label_num : 38\n",
  477. "edge_label_num : 0\n",
  478. "node_attr_dim : 0\n",
  479. "edge_attr_dim : 0\n",
  480. "class_number : 2\n",
  481. "\n",
  482. "load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 430440.94it/s]\n",
  483. "ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4352.25it/s] \n",
  484. "\n",
  485. "NCI-HIV:\n",
  486. "substructures : {'non linear', 'linear'}\n",
  487. "node_labeled : True\n",
  488. "edge_labeled : True\n",
  489. "is_directed : False\n",
  490. "dataset_size : 42682\n",
  491. "ave_node_num : 45.70945597675835\n",
  492. "min_node_num : 2\n",
  493. "max_node_num : 438\n",
  494. "ave_edge_num : 47.7137903565906\n",
  495. "min_edge_num : 1\n",
  496. "max_edge_num : 441\n",
  497. "ave_node_degree : 3.9760554800618526\n",
  498. "min_node_degree : 1\n",
  499. "max_node_degree : 12\n",
  500. "node_label_num : 63\n",
  501. "edge_label_num : 3\n",
  502. "node_attr_dim : 0\n",
  503. "edge_attr_dim : 0\n",
  504. "class_number : 3\n",
  505. "\n"
  506. ]
  507. }
  508. ],
  509. "source": [
  510. "import sys\n",
  511. "sys.path.insert(0, \"../\")\n",
  512. "from pygraph.utils.graphfiles import loadDataset\n",
  513. "from pygraph.utils.graphdataset import get_dataset_attributes\n",
  514. "\n",
  515. "dslist = [\n",
  516. " {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},\n",
  517. " {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',\n",
  518. " 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},\n",
  519. " {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},\n",
  520. " {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},\n",
  521. " {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
  522. " 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n",
  523. " {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
  524. " {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
  525. " {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
  526. " {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
  527. " 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n",
  528. " {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},\n",
  529. " {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n",
  530. " {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n",
  531. " {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n",
  532. " {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n",
  533. " {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},\n",
  534. " {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},\n",
  535. " {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, \n",
  536. " {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n",
  537. " {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, \n",
  538. " {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
  539. " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
  540. " {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
  541. " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
  542. " {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
  543. " 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',},\n",
  544. "\n",
  545. "# # not working below\n",
  546. "# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
  547. "# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
  548. "# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
  549. "# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
  550. "]\n",
  551. "\n",
  552. "for ds in dslist:\n",
  553. " dataset, y = loadDataset(\n",
  554. " ds['dataset'],\n",
  555. " filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
  556. " extra_params=(ds['extra_params'] if 'extra_params' in ds else None))\n",
  557. " attrs = get_dataset_attributes(\n",
  558. " dataset, target=y, node_label='atom', edge_label='bond_type')\n",
  559. " print()\n",
  560. " print(ds['name'] + ':')\n",
  561. " for atr in attrs:\n",
  562. " print(atr, ':', attrs[atr])\n",
  563. " print()"
  564. ]
  565. }
  566. ],
  567. "metadata": {
  568. "kernelspec": {
  569. "display_name": "Python 3",
  570. "language": "python",
  571. "name": "python3"
  572. },
  573. "language_info": {
  574. "codemirror_mode": {
  575. "name": "ipython",
  576. "version": 3
  577. },
  578. "file_extension": ".py",
  579. "mimetype": "text/x-python",
  580. "name": "python",
  581. "nbconvert_exporter": "python",
  582. "pygments_lexer": "ipython3",
  583. "version": "3.6.5"
  584. }
  585. },
  586. "nbformat": 4,
  587. "nbformat_minor": 2
  588. }

A Python package for graph kernels, graph edit distances and graph pre-image problem.