You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tutorial_7_metrics.ipynb 34 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "# 使用Metric快速评测你的模型\n",
  8. "\n",
  9. "和上一篇教程一样的实验准备代码"
  10. ]
  11. },
  12. {
  13. "cell_type": "code",
  14. "execution_count": 2,
  15. "metadata": {},
  16. "outputs": [],
  17. "source": [
  18. "from fastNLP.io import SST2Pipe\n",
  19. "from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric\n",
  20. "from fastNLP.models import CNNText\n",
  21. "import torch\n",
  22. "\n",
  23. "databundle = SST2Pipe().process_from_file()\n",
  24. "vocab = databundle.get_vocab('words')\n",
  25. "train_data = databundle.get_dataset('train')[:5000]\n",
  26. "train_data, test_data = train_data.split(0.015)\n",
  27. "dev_data = databundle.get_dataset('dev')\n",
  28. "\n",
  29. "model = CNNText((len(vocab),100), num_classes=2, dropout=0.1)\n",
  30. "loss = CrossEntropyLoss()\n",
  31. "metric = AccuracyMetric()\n",
  32. "device = 0 if torch.cuda.is_available() else 'cpu'"
  33. ]
  34. },
  35. {
  36. "cell_type": "markdown",
  37. "metadata": {},
  38. "source": [
  39. "进行训练时,fastNLP提供了各种各样的 metrics 。 如前面的教程中所介绍,AccuracyMetric 类的对象被直接传到 Trainer 中用于训练"
  40. ]
  41. },
  42. {
  43. "cell_type": "code",
  44. "execution_count": 3,
  45. "metadata": {
  46. "scrolled": true
  47. },
  48. "outputs": [
  49. {
  50. "name": "stdout",
  51. "output_type": "stream",
  52. "text": [
  53. "input fields after batch(if batch size is 2):\n",
  54. "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 4]) \n",
  55. "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
  56. "target fields after batch(if batch size is 2):\n",
  57. "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
  58. "\n",
  59. "training epochs started 2020-02-28-00-37-08\n"
  60. ]
  61. },
  62. {
  63. "data": {
  64. "application/vnd.jupyter.widget-view+json": {
  65. "model_id": "",
  66. "version_major": 2,
  67. "version_minor": 0
  68. },
  69. "text/plain": [
  70. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…"
  71. ]
  72. },
  73. "metadata": {},
  74. "output_type": "display_data"
  75. },
  76. {
  77. "data": {
  78. "application/vnd.jupyter.widget-view+json": {
  79. "model_id": "",
  80. "version_major": 2,
  81. "version_minor": 0
  82. },
  83. "text/plain": [
  84. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  85. ]
  86. },
  87. "metadata": {},
  88. "output_type": "display_data"
  89. },
  90. {
  91. "name": "stdout",
  92. "output_type": "stream",
  93. "text": [
  94. "\r",
  95. "Evaluate data in 0.28 seconds!\n",
  96. "\r",
  97. "Evaluation on dev at Epoch 1/10. Step:154/1540: \n",
  98. "\r",
  99. "AccuracyMetric: acc=0.747706\n",
  100. "\n"
  101. ]
  102. },
  103. {
  104. "data": {
  105. "application/vnd.jupyter.widget-view+json": {
  106. "model_id": "",
  107. "version_major": 2,
  108. "version_minor": 0
  109. },
  110. "text/plain": [
  111. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  112. ]
  113. },
  114. "metadata": {},
  115. "output_type": "display_data"
  116. },
  117. {
  118. "name": "stdout",
  119. "output_type": "stream",
  120. "text": [
  121. "\r",
  122. "Evaluate data in 0.17 seconds!\n",
  123. "\r",
  124. "Evaluation on dev at Epoch 2/10. Step:308/1540: \n",
  125. "\r",
  126. "AccuracyMetric: acc=0.745413\n",
  127. "\n"
  128. ]
  129. },
  130. {
  131. "data": {
  132. "application/vnd.jupyter.widget-view+json": {
  133. "model_id": "",
  134. "version_major": 2,
  135. "version_minor": 0
  136. },
  137. "text/plain": [
  138. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  139. ]
  140. },
  141. "metadata": {},
  142. "output_type": "display_data"
  143. },
  144. {
  145. "name": "stdout",
  146. "output_type": "stream",
  147. "text": [
  148. "\r",
  149. "Evaluate data in 0.19 seconds!\n",
  150. "\r",
  151. "Evaluation on dev at Epoch 3/10. Step:462/1540: \n",
  152. "\r",
  153. "AccuracyMetric: acc=0.74656\n",
  154. "\n"
  155. ]
  156. },
  157. {
  158. "data": {
  159. "application/vnd.jupyter.widget-view+json": {
  160. "model_id": "",
  161. "version_major": 2,
  162. "version_minor": 0
  163. },
  164. "text/plain": [
  165. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  166. ]
  167. },
  168. "metadata": {},
  169. "output_type": "display_data"
  170. },
  171. {
  172. "name": "stdout",
  173. "output_type": "stream",
  174. "text": [
  175. "\r",
  176. "Evaluate data in 0.15 seconds!\n",
  177. "\r",
  178. "Evaluation on dev at Epoch 4/10. Step:616/1540: \n",
  179. "\r",
  180. "AccuracyMetric: acc=0.762615\n",
  181. "\n"
  182. ]
  183. },
  184. {
  185. "data": {
  186. "application/vnd.jupyter.widget-view+json": {
  187. "model_id": "",
  188. "version_major": 2,
  189. "version_minor": 0
  190. },
  191. "text/plain": [
  192. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  193. ]
  194. },
  195. "metadata": {},
  196. "output_type": "display_data"
  197. },
  198. {
  199. "name": "stdout",
  200. "output_type": "stream",
  201. "text": [
  202. "\r",
  203. "Evaluate data in 0.42 seconds!\n",
  204. "\r",
  205. "Evaluation on dev at Epoch 5/10. Step:770/1540: \n",
  206. "\r",
  207. "AccuracyMetric: acc=0.736239\n",
  208. "\n"
  209. ]
  210. },
  211. {
  212. "data": {
  213. "application/vnd.jupyter.widget-view+json": {
  214. "model_id": "",
  215. "version_major": 2,
  216. "version_minor": 0
  217. },
  218. "text/plain": [
  219. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  220. ]
  221. },
  222. "metadata": {},
  223. "output_type": "display_data"
  224. },
  225. {
  226. "name": "stdout",
  227. "output_type": "stream",
  228. "text": [
  229. "\r",
  230. "Evaluate data in 0.16 seconds!\n",
  231. "\r",
  232. "Evaluation on dev at Epoch 6/10. Step:924/1540: \n",
  233. "\r",
  234. "AccuracyMetric: acc=0.761468\n",
  235. "\n"
  236. ]
  237. },
  238. {
  239. "data": {
  240. "application/vnd.jupyter.widget-view+json": {
  241. "model_id": "",
  242. "version_major": 2,
  243. "version_minor": 0
  244. },
  245. "text/plain": [
  246. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  247. ]
  248. },
  249. "metadata": {},
  250. "output_type": "display_data"
  251. },
  252. {
  253. "name": "stdout",
  254. "output_type": "stream",
  255. "text": [
  256. "\r",
  257. "Evaluate data in 0.42 seconds!\n",
  258. "\r",
  259. "Evaluation on dev at Epoch 7/10. Step:1078/1540: \n",
  260. "\r",
  261. "AccuracyMetric: acc=0.727064\n",
  262. "\n"
  263. ]
  264. },
  265. {
  266. "data": {
  267. "application/vnd.jupyter.widget-view+json": {
  268. "model_id": "",
  269. "version_major": 2,
  270. "version_minor": 0
  271. },
  272. "text/plain": [
  273. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  274. ]
  275. },
  276. "metadata": {},
  277. "output_type": "display_data"
  278. },
  279. {
  280. "name": "stdout",
  281. "output_type": "stream",
  282. "text": [
  283. "\r",
  284. "Evaluate data in 0.21 seconds!\n",
  285. "\r",
  286. "Evaluation on dev at Epoch 8/10. Step:1232/1540: \n",
  287. "\r",
  288. "AccuracyMetric: acc=0.731651\n",
  289. "\n"
  290. ]
  291. },
  292. {
  293. "data": {
  294. "application/vnd.jupyter.widget-view+json": {
  295. "model_id": "",
  296. "version_major": 2,
  297. "version_minor": 0
  298. },
  299. "text/plain": [
  300. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  301. ]
  302. },
  303. "metadata": {},
  304. "output_type": "display_data"
  305. },
  306. {
  307. "name": "stdout",
  308. "output_type": "stream",
  309. "text": [
  310. "\r",
  311. "Evaluate data in 0.52 seconds!\n",
  312. "\r",
  313. "Evaluation on dev at Epoch 9/10. Step:1386/1540: \n",
  314. "\r",
  315. "AccuracyMetric: acc=0.752294\n",
  316. "\n"
  317. ]
  318. },
  319. {
  320. "data": {
  321. "application/vnd.jupyter.widget-view+json": {
  322. "model_id": "",
  323. "version_major": 2,
  324. "version_minor": 0
  325. },
  326. "text/plain": [
  327. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  328. ]
  329. },
  330. "metadata": {},
  331. "output_type": "display_data"
  332. },
  333. {
  334. "name": "stdout",
  335. "output_type": "stream",
  336. "text": [
  337. "\r",
  338. "Evaluate data in 0.44 seconds!\n",
  339. "\r",
  340. "Evaluation on dev at Epoch 10/10. Step:1540/1540: \n",
  341. "\r",
  342. "AccuracyMetric: acc=0.760321\n",
  343. "\n",
  344. "\r\n",
  345. "In Epoch:4/Step:616, got best dev performance:\n",
  346. "AccuracyMetric: acc=0.762615\n",
  347. "Reloaded the best model.\n"
  348. ]
  349. },
  350. {
  351. "data": {
  352. "text/plain": [
  353. "{'best_eval': {'AccuracyMetric': {'acc': 0.762615}},\n",
  354. " 'best_epoch': 4,\n",
  355. " 'best_step': 616,\n",
  356. " 'seconds': 32.63}"
  357. ]
  358. },
  359. "execution_count": 3,
  360. "metadata": {},
  361. "output_type": "execute_result"
  362. }
  363. ],
  364. "source": [
  365. "trainer = Trainer(train_data=train_data, dev_data=dev_data, model=model,\n",
  366. " loss=loss, device=device, metrics=metric)\n",
  367. "trainer.train()"
  368. ]
  369. },
  370. {
  371. "cell_type": "markdown",
  372. "metadata": {},
  373. "source": [
  374. "除了 AccuracyMetric 之外,SpanFPreRecMetric 也是一种非常见的评价指标, 例如在序列标注问题中,常以span的方式计算 F-measure, precision, recall。\n",
  375. "\n",
  376. "另外,fastNLP 还实现了用于抽取式QA(如SQuAD)的metric ExtractiveQAMetric。 用户可以参考下面这个表格。\n",
  377. "\n",
  378. "| 名称 | 介绍 |\n",
  379. "| -------------------- | ------------------------------------------------- |\n",
  380. "| `MetricBase` | 自定义metrics需继承的基类 |\n",
  381. "| `AccuracyMetric` | 简单的正确率metric |\n",
  382. "| `SpanFPreRecMetric` | 同时计算 F-measure, precision, recall 值的 metric |\n",
  383. "| `ExtractiveQAMetric` | 用于抽取式QA任务 的metric |\n",
  384. "\n"
  385. ]
  386. },
  387. {
  388. "cell_type": "markdown",
  389. "metadata": {},
  390. "source": [
  391. "## 定义自己的metrics\n",
  392. "\n",
  393. "在定义自己的metrics类时需继承 fastNLP 的 MetricBase, 并覆盖写入 evaluate 和 get_metric 方法。\n",
  394. "\n",
  395. "- evaluate(xxx) 中传入一个批次的数据,将针对一个批次的预测结果做评价指标的累计\n",
  396. "\n",
  397. "- get_metric(xxx) 当所有数据处理完毕时调用该方法,它将根据 evaluate函数累计的评价指标统计量来计算最终的评价结果\n",
  398. "\n",
  399. "以分类问题中,Accuracy计算为例,假设model的forward返回dict中包含 pred 这个key, 并且该key需要用于Accuracy:\n",
  400. "\n",
  401. "```python\n",
  402. "class Model(nn.Module):\n",
  403. " def __init__(xxx):\n",
  404. " # do something\n",
  405. " def forward(self, xxx):\n",
  406. " # do something\n",
  407. " return {'pred': pred, 'other_keys':xxx} # pred's shape: batch_size x num_classes\n",
  408. "```"
  409. ]
  410. },
  411. {
  412. "cell_type": "markdown",
  413. "metadata": {},
  414. "source": [
  415. "### Version 1\n",
  416. "\n",
  417. "假设dataset中 `target` 这个 field 是需要预测的值,并且该 field 被设置为了 target 对应的 `AccMetric` 可以按如下的定义"
  418. ]
  419. },
  420. {
  421. "cell_type": "code",
  422. "execution_count": 4,
  423. "metadata": {},
  424. "outputs": [],
  425. "source": [
  426. "from fastNLP import MetricBase\n",
  427. "\n",
  428. "class AccMetric(MetricBase):\n",
  429. "\n",
  430. " def __init__(self):\n",
  431. " super().__init__()\n",
  432. " # 根据你的情况自定义指标\n",
  433. " self.total = 0\n",
  434. " self.acc_count = 0\n",
  435. "\n",
  436. " # evaluate的参数需要和DataSet 中 field 名以及模型输出的结果 field 名一致,不然找不到对应的value\n",
  437. " # pred, target 的参数是 fastNLP 的默认配置\n",
  438. " def evaluate(self, pred, target):\n",
  439. " # dev或test时,每个batch结束会调用一次该方法,需要实现如何根据每个batch累加metric\n",
  440. " self.total += target.size(0)\n",
  441. " self.acc_count += target.eq(pred).sum().item()\n",
  442. "\n",
  443. " def get_metric(self, reset=True): # 在这里定义如何计算metric\n",
  444. " acc = self.acc_count/self.total\n",
  445. " if reset: # 是否清零以便重新计算\n",
  446. " self.acc_count = 0\n",
  447. " self.total = 0\n",
  448. " return {'acc': acc}\n",
  449. " # 需要返回一个dict,key为该metric的名称,该名称会显示到Trainer的progress bar中"
  450. ]
  451. },
  452. {
  453. "cell_type": "code",
  454. "execution_count": 5,
  455. "metadata": {
  456. "scrolled": true
  457. },
  458. "outputs": [
  459. {
  460. "name": "stdout",
  461. "output_type": "stream",
  462. "text": [
  463. "input fields after batch(if batch size is 2):\n",
  464. "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 4]) \n",
  465. "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
  466. "target fields after batch(if batch size is 2):\n",
  467. "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
  468. "\n",
  469. "training epochs started 2020-02-28-00-37-41\n"
  470. ]
  471. },
  472. {
  473. "data": {
  474. "application/vnd.jupyter.widget-view+json": {
  475. "model_id": "",
  476. "version_major": 2,
  477. "version_minor": 0
  478. },
  479. "text/plain": [
  480. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…"
  481. ]
  482. },
  483. "metadata": {},
  484. "output_type": "display_data"
  485. },
  486. {
  487. "data": {
  488. "application/vnd.jupyter.widget-view+json": {
  489. "model_id": "",
  490. "version_major": 2,
  491. "version_minor": 0
  492. },
  493. "text/plain": [
  494. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  495. ]
  496. },
  497. "metadata": {},
  498. "output_type": "display_data"
  499. },
  500. {
  501. "name": "stdout",
  502. "output_type": "stream",
  503. "text": [
  504. "\r",
  505. "Evaluate data in 0.27 seconds!\n",
  506. "\r",
  507. "Evaluation on dev at Epoch 1/10. Step:154/1540: \n",
  508. "\r",
  509. "AccMetric: acc=0.7431192660550459\n",
  510. "\n"
  511. ]
  512. },
  513. {
  514. "data": {
  515. "application/vnd.jupyter.widget-view+json": {
  516. "model_id": "",
  517. "version_major": 2,
  518. "version_minor": 0
  519. },
  520. "text/plain": [
  521. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  522. ]
  523. },
  524. "metadata": {},
  525. "output_type": "display_data"
  526. },
  527. {
  528. "name": "stdout",
  529. "output_type": "stream",
  530. "text": [
  531. "\r",
  532. "Evaluate data in 0.42 seconds!\n",
  533. "\r",
  534. "Evaluation on dev at Epoch 2/10. Step:308/1540: \n",
  535. "\r",
  536. "AccMetric: acc=0.7522935779816514\n",
  537. "\n"
  538. ]
  539. },
  540. {
  541. "data": {
  542. "application/vnd.jupyter.widget-view+json": {
  543. "model_id": "",
  544. "version_major": 2,
  545. "version_minor": 0
  546. },
  547. "text/plain": [
  548. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  549. ]
  550. },
  551. "metadata": {},
  552. "output_type": "display_data"
  553. },
  554. {
  555. "name": "stdout",
  556. "output_type": "stream",
  557. "text": [
  558. "\r",
  559. "Evaluate data in 0.51 seconds!\n",
  560. "\r",
  561. "Evaluation on dev at Epoch 3/10. Step:462/1540: \n",
  562. "\r",
  563. "AccMetric: acc=0.7477064220183486\n",
  564. "\n"
  565. ]
  566. },
  567. {
  568. "data": {
  569. "application/vnd.jupyter.widget-view+json": {
  570. "model_id": "",
  571. "version_major": 2,
  572. "version_minor": 0
  573. },
  574. "text/plain": [
  575. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  576. ]
  577. },
  578. "metadata": {},
  579. "output_type": "display_data"
  580. },
  581. {
  582. "name": "stdout",
  583. "output_type": "stream",
  584. "text": [
  585. "\r",
  586. "Evaluate data in 0.48 seconds!\n",
  587. "\r",
  588. "Evaluation on dev at Epoch 4/10. Step:616/1540: \n",
  589. "\r",
  590. "AccMetric: acc=0.7442660550458715\n",
  591. "\n"
  592. ]
  593. },
  594. {
  595. "data": {
  596. "application/vnd.jupyter.widget-view+json": {
  597. "model_id": "",
  598. "version_major": 2,
  599. "version_minor": 0
  600. },
  601. "text/plain": [
  602. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  603. ]
  604. },
  605. "metadata": {},
  606. "output_type": "display_data"
  607. },
  608. {
  609. "name": "stdout",
  610. "output_type": "stream",
  611. "text": [
  612. "\r",
  613. "Evaluate data in 0.5 seconds!\n",
  614. "\r",
  615. "Evaluation on dev at Epoch 5/10. Step:770/1540: \n",
  616. "\r",
  617. "AccMetric: acc=0.7362385321100917\n",
  618. "\n"
  619. ]
  620. },
  621. {
  622. "data": {
  623. "application/vnd.jupyter.widget-view+json": {
  624. "model_id": "",
  625. "version_major": 2,
  626. "version_minor": 0
  627. },
  628. "text/plain": [
  629. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  630. ]
  631. },
  632. "metadata": {},
  633. "output_type": "display_data"
  634. },
  635. {
  636. "name": "stdout",
  637. "output_type": "stream",
  638. "text": [
  639. "\r",
  640. "Evaluate data in 0.45 seconds!\n",
  641. "\r",
  642. "Evaluation on dev at Epoch 6/10. Step:924/1540: \n",
  643. "\r",
  644. "AccMetric: acc=0.7293577981651376\n",
  645. "\n"
  646. ]
  647. },
  648. {
  649. "data": {
  650. "application/vnd.jupyter.widget-view+json": {
  651. "model_id": "",
  652. "version_major": 2,
  653. "version_minor": 0
  654. },
  655. "text/plain": [
  656. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  657. ]
  658. },
  659. "metadata": {},
  660. "output_type": "display_data"
  661. },
  662. {
  663. "name": "stdout",
  664. "output_type": "stream",
  665. "text": [
  666. "\r",
  667. "Evaluate data in 0.33 seconds!\n",
  668. "\r",
  669. "Evaluation on dev at Epoch 7/10. Step:1078/1540: \n",
  670. "\r",
  671. "AccMetric: acc=0.7190366972477065\n",
  672. "\n"
  673. ]
  674. },
  675. {
  676. "data": {
  677. "application/vnd.jupyter.widget-view+json": {
  678. "model_id": "",
  679. "version_major": 2,
  680. "version_minor": 0
  681. },
  682. "text/plain": [
  683. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  684. ]
  685. },
  686. "metadata": {},
  687. "output_type": "display_data"
  688. },
  689. {
  690. "name": "stdout",
  691. "output_type": "stream",
  692. "text": [
  693. "\r",
  694. "Evaluate data in 0.29 seconds!\n",
  695. "\r",
  696. "Evaluation on dev at Epoch 8/10. Step:1232/1540: \n",
  697. "\r",
  698. "AccMetric: acc=0.7419724770642202\n",
  699. "\n"
  700. ]
  701. },
  702. {
  703. "data": {
  704. "application/vnd.jupyter.widget-view+json": {
  705. "model_id": "",
  706. "version_major": 2,
  707. "version_minor": 0
  708. },
  709. "text/plain": [
  710. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  711. ]
  712. },
  713. "metadata": {},
  714. "output_type": "display_data"
  715. },
  716. {
  717. "name": "stdout",
  718. "output_type": "stream",
  719. "text": [
  720. "\r",
  721. "Evaluate data in 0.34 seconds!\n",
  722. "\r",
  723. "Evaluation on dev at Epoch 9/10. Step:1386/1540: \n",
  724. "\r",
  725. "AccMetric: acc=0.7350917431192661\n",
  726. "\n"
  727. ]
  728. },
  729. {
  730. "data": {
  731. "application/vnd.jupyter.widget-view+json": {
  732. "model_id": "",
  733. "version_major": 2,
  734. "version_minor": 0
  735. },
  736. "text/plain": [
  737. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  738. ]
  739. },
  740. "metadata": {},
  741. "output_type": "display_data"
  742. },
  743. {
  744. "name": "stdout",
  745. "output_type": "stream",
  746. "text": [
  747. "\r",
  748. "Evaluate data in 0.18 seconds!\n",
  749. "\r",
  750. "Evaluation on dev at Epoch 10/10. Step:1540/1540: \n",
  751. "\r",
  752. "AccMetric: acc=0.6846330275229358\n",
  753. "\n",
  754. "\r\n",
  755. "In Epoch:2/Step:308, got best dev performance:\n",
  756. "AccMetric: acc=0.7522935779816514\n",
  757. "Reloaded the best model.\n"
  758. ]
  759. },
  760. {
  761. "data": {
  762. "text/plain": [
  763. "{'best_eval': {'AccMetric': {'acc': 0.7522935779816514}},\n",
  764. " 'best_epoch': 2,\n",
  765. " 'best_step': 308,\n",
  766. " 'seconds': 42.7}"
  767. ]
  768. },
  769. "execution_count": 5,
  770. "metadata": {},
  771. "output_type": "execute_result"
  772. }
  773. ],
  774. "source": [
  775. "trainer = Trainer(train_data=train_data, dev_data=dev_data, model=model,\n",
  776. " loss=loss, device=device, metrics=AccMetric())\n",
  777. "trainer.train()"
  778. ]
  779. },
  780. {
  781. "cell_type": "markdown",
  782. "metadata": {},
  783. "source": [
  784. "### Version 2\n",
  785. "\n",
  786. "如果需要复用 metric,比如下一次使用 `AccMetric` 时,dataset中目标field不叫 `target` 而叫 `y` ,或者model的输出不是 `pred`\n"
  787. ]
  788. },
  789. {
  790. "cell_type": "code",
  791. "execution_count": 6,
  792. "metadata": {},
  793. "outputs": [],
  794. "source": [
  795. "class AccMetric(MetricBase):\n",
  796. " def __init__(self, pred=None, target=None):\n",
  797. " \"\"\"\n",
  798. " 假设在另一场景使用时,目标field叫y,model给出的key为pred_y。则只需要在初始化AccMetric时,\n",
  799. " acc_metric = AccMetric(pred='pred_y', target='y')即可。\n",
  800. " 当初始化为acc_metric = AccMetric() 时,fastNLP会直接使用 'pred', 'target' 作为key去索取对应的的值\n",
  801. " \"\"\"\n",
  802. "\n",
  803. " super().__init__()\n",
  804. "\n",
  805. " # 如果没有注册该则效果与 Version 1 就是一样的\n",
  806. " self._init_param_map(pred=pred, target=target) # 该方法会注册label和pred. 仅需要注册evaluate()方法会用到的参数名即可\n",
  807. "\n",
  808. " # 根据你的情况自定义指标\n",
  809. " self.total = 0\n",
  810. " self.acc_count = 0\n",
  811. "\n",
  812. " # evaluate的参数需要和DataSet 中 field 名以及模型输出的结果 field 名一致,不然找不到对应的value\n",
  813. " # pred, target 的参数是 fastNLP 的默认配置\n",
  814. " def evaluate(self, pred, target):\n",
  815. " # dev或test时,每个batch结束会调用一次该方法,需要实现如何根据每个batch累加metric\n",
  816. " self.total += target.size(0)\n",
  817. " self.acc_count += target.eq(pred).sum().item()\n",
  818. "\n",
  819. " def get_metric(self, reset=True): # 在这里定义如何计算metric\n",
  820. " acc = self.acc_count/self.total\n",
  821. " if reset: # 是否清零以便重新计算\n",
  822. " self.acc_count = 0\n",
  823. " self.total = 0\n",
  824. " return {'acc': acc}\n",
  825. " # 需要返回一个dict,key为该metric的名称,该名称会显示到Trainer的progress bar中"
  826. ]
  827. },
  828. {
  829. "cell_type": "code",
  830. "execution_count": 7,
  831. "metadata": {
  832. "scrolled": true
  833. },
  834. "outputs": [
  835. {
  836. "name": "stdout",
  837. "output_type": "stream",
  838. "text": [
  839. "input fields after batch(if batch size is 2):\n",
  840. "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 4]) \n",
  841. "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
  842. "target fields after batch(if batch size is 2):\n",
  843. "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
  844. "\n",
  845. "training epochs started 2020-02-28-00-38-24\n"
  846. ]
  847. },
  848. {
  849. "data": {
  850. "application/vnd.jupyter.widget-view+json": {
  851. "model_id": "",
  852. "version_major": 2,
  853. "version_minor": 0
  854. },
  855. "text/plain": [
  856. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…"
  857. ]
  858. },
  859. "metadata": {},
  860. "output_type": "display_data"
  861. },
  862. {
  863. "data": {
  864. "application/vnd.jupyter.widget-view+json": {
  865. "model_id": "",
  866. "version_major": 2,
  867. "version_minor": 0
  868. },
  869. "text/plain": [
  870. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  871. ]
  872. },
  873. "metadata": {},
  874. "output_type": "display_data"
  875. },
  876. {
  877. "name": "stdout",
  878. "output_type": "stream",
  879. "text": [
  880. "\r",
  881. "Evaluate data in 0.32 seconds!\n",
  882. "\r",
  883. "Evaluation on dev at Epoch 1/10. Step:154/1540: \n",
  884. "\r",
  885. "AccMetric: acc=0.7511467889908257\n",
  886. "\n"
  887. ]
  888. },
  889. {
  890. "data": {
  891. "application/vnd.jupyter.widget-view+json": {
  892. "model_id": "",
  893. "version_major": 2,
  894. "version_minor": 0
  895. },
  896. "text/plain": [
  897. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  898. ]
  899. },
  900. "metadata": {},
  901. "output_type": "display_data"
  902. },
  903. {
  904. "name": "stdout",
  905. "output_type": "stream",
  906. "text": [
  907. "\r",
  908. "Evaluate data in 0.29 seconds!\n",
  909. "\r",
  910. "Evaluation on dev at Epoch 2/10. Step:308/1540: \n",
  911. "\r",
  912. "AccMetric: acc=0.7454128440366973\n",
  913. "\n"
  914. ]
  915. },
  916. {
  917. "data": {
  918. "application/vnd.jupyter.widget-view+json": {
  919. "model_id": "",
  920. "version_major": 2,
  921. "version_minor": 0
  922. },
  923. "text/plain": [
  924. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  925. ]
  926. },
  927. "metadata": {},
  928. "output_type": "display_data"
  929. },
  930. {
  931. "name": "stdout",
  932. "output_type": "stream",
  933. "text": [
  934. "\r",
  935. "Evaluate data in 0.42 seconds!\n",
  936. "\r",
  937. "Evaluation on dev at Epoch 3/10. Step:462/1540: \n",
  938. "\r",
  939. "AccMetric: acc=0.7224770642201835\n",
  940. "\n"
  941. ]
  942. },
  943. {
  944. "data": {
  945. "application/vnd.jupyter.widget-view+json": {
  946. "model_id": "",
  947. "version_major": 2,
  948. "version_minor": 0
  949. },
  950. "text/plain": [
  951. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  952. ]
  953. },
  954. "metadata": {},
  955. "output_type": "display_data"
  956. },
  957. {
  958. "name": "stdout",
  959. "output_type": "stream",
  960. "text": [
  961. "\r",
  962. "Evaluate data in 0.4 seconds!\n",
  963. "\r",
  964. "Evaluation on dev at Epoch 4/10. Step:616/1540: \n",
  965. "\r",
  966. "AccMetric: acc=0.7534403669724771\n",
  967. "\n"
  968. ]
  969. },
  970. {
  971. "data": {
  972. "application/vnd.jupyter.widget-view+json": {
  973. "model_id": "",
  974. "version_major": 2,
  975. "version_minor": 0
  976. },
  977. "text/plain": [
  978. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  979. ]
  980. },
  981. "metadata": {},
  982. "output_type": "display_data"
  983. },
  984. {
  985. "name": "stdout",
  986. "output_type": "stream",
  987. "text": [
  988. "\r",
  989. "Evaluate data in 0.41 seconds!\n",
  990. "\r",
  991. "Evaluation on dev at Epoch 5/10. Step:770/1540: \n",
  992. "\r",
  993. "AccMetric: acc=0.7396788990825688\n",
  994. "\n"
  995. ]
  996. },
  997. {
  998. "data": {
  999. "application/vnd.jupyter.widget-view+json": {
  1000. "model_id": "",
  1001. "version_major": 2,
  1002. "version_minor": 0
  1003. },
  1004. "text/plain": [
  1005. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  1006. ]
  1007. },
  1008. "metadata": {},
  1009. "output_type": "display_data"
  1010. },
  1011. {
  1012. "name": "stdout",
  1013. "output_type": "stream",
  1014. "text": [
  1015. "\r",
  1016. "Evaluate data in 0.22 seconds!\n",
  1017. "\r",
  1018. "Evaluation on dev at Epoch 6/10. Step:924/1540: \n",
  1019. "\r",
  1020. "AccMetric: acc=0.7442660550458715\n",
  1021. "\n"
  1022. ]
  1023. },
  1024. {
  1025. "data": {
  1026. "application/vnd.jupyter.widget-view+json": {
  1027. "model_id": "",
  1028. "version_major": 2,
  1029. "version_minor": 0
  1030. },
  1031. "text/plain": [
  1032. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  1033. ]
  1034. },
  1035. "metadata": {},
  1036. "output_type": "display_data"
  1037. },
  1038. {
  1039. "name": "stdout",
  1040. "output_type": "stream",
  1041. "text": [
  1042. "\r",
  1043. "Evaluate data in 0.45 seconds!\n",
  1044. "\r",
  1045. "Evaluation on dev at Epoch 7/10. Step:1078/1540: \n",
  1046. "\r",
  1047. "AccMetric: acc=0.6903669724770642\n",
  1048. "\n"
  1049. ]
  1050. },
  1051. {
  1052. "data": {
  1053. "application/vnd.jupyter.widget-view+json": {
  1054. "model_id": "",
  1055. "version_major": 2,
  1056. "version_minor": 0
  1057. },
  1058. "text/plain": [
  1059. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  1060. ]
  1061. },
  1062. "metadata": {},
  1063. "output_type": "display_data"
  1064. },
  1065. {
  1066. "name": "stdout",
  1067. "output_type": "stream",
  1068. "text": [
  1069. "\r",
  1070. "Evaluate data in 0.25 seconds!\n",
  1071. "\r",
  1072. "Evaluation on dev at Epoch 8/10. Step:1232/1540: \n",
  1073. "\r",
  1074. "AccMetric: acc=0.7293577981651376\n",
  1075. "\n"
  1076. ]
  1077. },
  1078. {
  1079. "data": {
  1080. "application/vnd.jupyter.widget-view+json": {
  1081. "model_id": "",
  1082. "version_major": 2,
  1083. "version_minor": 0
  1084. },
  1085. "text/plain": [
  1086. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  1087. ]
  1088. },
  1089. "metadata": {},
  1090. "output_type": "display_data"
  1091. },
  1092. {
  1093. "name": "stdout",
  1094. "output_type": "stream",
  1095. "text": [
  1096. "\r",
  1097. "Evaluate data in 0.4 seconds!\n",
  1098. "\r",
  1099. "Evaluation on dev at Epoch 9/10. Step:1386/1540: \n",
  1100. "\r",
  1101. "AccMetric: acc=0.7006880733944955\n",
  1102. "\n"
  1103. ]
  1104. },
  1105. {
  1106. "data": {
  1107. "application/vnd.jupyter.widget-view+json": {
  1108. "model_id": "",
  1109. "version_major": 2,
  1110. "version_minor": 0
  1111. },
  1112. "text/plain": [
  1113. "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
  1114. ]
  1115. },
  1116. "metadata": {},
  1117. "output_type": "display_data"
  1118. },
  1119. {
  1120. "name": "stdout",
  1121. "output_type": "stream",
  1122. "text": [
  1123. "\r",
  1124. "Evaluate data in 0.48 seconds!\n",
  1125. "\r",
  1126. "Evaluation on dev at Epoch 10/10. Step:1540/1540: \n",
  1127. "\r",
  1128. "AccMetric: acc=0.7339449541284404\n",
  1129. "\n",
  1130. "\r\n",
  1131. "In Epoch:4/Step:616, got best dev performance:\n",
  1132. "AccMetric: acc=0.7534403669724771\n",
  1133. "Reloaded the best model.\n"
  1134. ]
  1135. },
  1136. {
  1137. "data": {
  1138. "text/plain": [
  1139. "{'best_eval': {'AccMetric': {'acc': 0.7534403669724771}},\n",
  1140. " 'best_epoch': 4,\n",
  1141. " 'best_step': 616,\n",
  1142. " 'seconds': 34.74}"
  1143. ]
  1144. },
  1145. "execution_count": 7,
  1146. "metadata": {},
  1147. "output_type": "execute_result"
  1148. }
  1149. ],
  1150. "source": [
  1151. "trainer = Trainer(train_data=train_data, dev_data=dev_data, model=model,\n",
  1152. " loss=loss, device=device, metrics=AccMetric())\n",
  1153. "trainer.train()"
  1154. ]
  1155. },
  1156. {
  1157. "cell_type": "markdown",
  1158. "metadata": {},
  1159. "source": [
  1160. "``MetricBase`` 将会在输入的字典 ``pred_dict`` 和 ``target_dict`` 中进行检查.\n",
  1161. "``pred_dict`` 是模型当中 ``forward()`` 函数或者 ``predict()`` 函数的返回值.\n",
  1162. "``target_dict`` 是DataSet当中的ground truth, 判定ground truth的条件是field的 ``is_target`` 被设置为True.\n",
  1163. "\n",
  1164. "``MetricBase`` 会进行以下的类型检测:\n",
  1165. "\n",
  1166. "1. self.evaluate当中是否有 varargs, 这是不支持的.\n",
  1167. "2. self.evaluate当中所需要的参数是否既不在 ``pred_dict`` 也不在 ``target_dict`` .\n",
  1168. "3. self.evaluate当中所需要的参数是否既在 ``pred_dict`` 也在 ``target_dict`` .\n",
  1169. "\n",
  1170. "除此以外,在参数被传入self.evaluate以前,这个函数会检测 ``pred_dict`` 和 ``target_dict`` 当中没有被用到的参数\n",
  1171. "如果kwargs是self.evaluate的参数,则不会检测\n",
  1172. "\n",
  1173. "self.evaluate将计算一个批次(batch)的评价指标,并累计。 没有返回值\n",
  1174. "self.get_metric将统计当前的评价指标并返回评价结果, 返回值需要是一个dict, key是指标名称,value是指标的值\n"
  1175. ]
  1176. },
  1177. {
  1178. "cell_type": "code",
  1179. "execution_count": null,
  1180. "metadata": {},
  1181. "outputs": [],
  1182. "source": []
  1183. }
  1184. ],
  1185. "metadata": {
  1186. "kernelspec": {
  1187. "display_name": "Python Now",
  1188. "language": "python",
  1189. "name": "now"
  1190. },
  1191. "language_info": {
  1192. "codemirror_mode": {
  1193. "name": "ipython",
  1194. "version": 3
  1195. },
  1196. "file_extension": ".py",
  1197. "mimetype": "text/x-python",
  1198. "name": "python",
  1199. "nbconvert_exporter": "python",
  1200. "pygments_lexer": "ipython3",
  1201. "version": "3.8.0"
  1202. }
  1203. },
  1204. "nbformat": 4,
  1205. "nbformat_minor": 2
  1206. }