You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fastnlp_tutorial_6.ipynb 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "id": "fdd7ff16",
  6. "metadata": {},
  7. "source": [
  8. "# T6. fastNLP 与 paddle 或 jittor 的结合\n",
  9. "\n",
  10. "  1   fastNLP 结合 paddle 训练模型\n",
  11. " \n",
  12. "    1.1   关于 paddle 的简单介绍\n",
  13. "\n",
  14. "    1.2   使用 paddle 搭建并训练模型\n",
  15. "\n",
  16. "  2   fastNLP 结合 jittor 训练模型\n",
  17. "\n",
  18. "    2.1   关于 jittor 的简单介绍\n",
  19. "\n",
  20. "    2.2   使用 jittor 搭建并训练模型\n",
  21. "\n",
  22. "  3   fastNLP 实现 paddle 与 pytorch 互转"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": null,
  28. "id": "08752c5a",
  29. "metadata": {},
  30. "outputs": [],
  31. "source": [
  32. "from datasets import load_dataset\n",
  33. "\n",
  34. "sst2data = load_dataset('glue', 'sst2')"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": null,
  40. "id": "7e8cc210",
  41. "metadata": {},
  42. "outputs": [],
  43. "source": [
  44. "import sys\n",
  45. "sys.path.append('..')\n",
  46. "\n",
  47. "from fastNLP import DataSet\n",
  48. "\n",
  49. "dataset = DataSet.from_pandas(sst2data['train'].to_pandas())[:6000]\n",
  50. "\n",
  51. "dataset.apply_more(lambda ins:{'words': ins['sentence'].lower().split(), 'target': ins['label']}, \n",
  52. " progress_bar=\"tqdm\")\n",
  53. "dataset.delete_field('sentence')\n",
  54. "dataset.delete_field('label')\n",
  55. "dataset.delete_field('idx')\n",
  56. "\n",
  57. "from fastNLP import Vocabulary\n",
  58. "\n",
  59. "vocab = Vocabulary()\n",
  60. "vocab.from_dataset(dataset, field_name='words')\n",
  61. "vocab.index_dataset(dataset, field_name='words')\n",
  62. "\n",
  63. "train_dataset, evaluate_dataset = dataset.split(ratio=0.85)\n",
  64. "print(type(train_dataset), isinstance(train_dataset, DataSet))\n",
  65. "\n",
  66. "from fastNLP.io import DataBundle\n",
  67. "\n",
  68. "data_bundle = DataBundle(datasets={'train': train_dataset, 'dev': evaluate_dataset})"
  69. ]
  70. },
  71. {
  72. "cell_type": "markdown",
  73. "id": "57a3272f",
  74. "metadata": {},
  75. "source": [
  76. "## 1. fastNLP 结合 paddle 训练模型\n",
  77. "\n",
  78. "```python\n",
  79. "import paddle\n",
  80. "\n",
  81. "lstm = paddle.nn.LSTM(16, 32, 2)\n",
  82. "\n",
  83. "x = paddle.randn((4, 23, 16))\n",
  84. "h = paddle.randn((2, 4, 32))\n",
  85. "c = paddle.randn((2, 4, 32))\n",
  86. "\n",
  87. "y, (h, c) = lstm(x, (h, c))\n",
  88. "\n",
  89. "print(y.shape) # [4, 23, 32]\n",
  90. "print(h.shape) # [2, 4, 32]\n",
  91. "print(c.shape) # [2, 4, 32]\n",
  92. "```"
  93. ]
  94. },
  95. {
  96. "cell_type": "code",
  97. "execution_count": null,
  98. "id": "e31b3198",
  99. "metadata": {},
  100. "outputs": [],
  101. "source": [
  102. "import paddle\n",
  103. "import paddle.nn as nn\n",
  104. "\n",
  105. "\n",
  106. "class ClsByPaddle(nn.Layer):\n",
  107. " def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):\n",
  108. " nn.Layer.__init__(self)\n",
  109. " self.hidden_dim = hidden_dim\n",
  110. "\n",
  111. " self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)\n",
  112. " # self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, \n",
  113. " # num_layers=num_layers, direction='bidirectional', dropout=dropout)\n",
  114. " self.mlp = nn.Sequential(('linear_1', nn.Linear(hidden_dim * 2, hidden_dim * 2)),\n",
  115. " ('activate', nn.ReLU()),\n",
  116. " ('linear_2', nn.Linear(hidden_dim * 2, output_dim)))\n",
  117. " \n",
  118. " self.loss_fn = nn.CrossEntropyLoss()\n",
  119. "\n",
  120. " def forward(self, words):\n",
  121. " output = self.embedding(words)\n",
  122. " # output, (hidden, cell) = self.lstm(output)\n",
  123. " hidden = paddle.randn((2, words.shape[0], self.hidden_dim))\n",
  124. " output = self.mlp(paddle.concat((hidden[-1], hidden[-2]), axis=1))\n",
  125. " return output\n",
  126. " \n",
  127. " def train_step(self, words, target):\n",
  128. " pred = self(words)\n",
  129. " return {\"loss\": self.loss_fn(pred, target)}\n",
  130. "\n",
  131. " def evaluate_step(self, words, target):\n",
  132. " pred = self(words)\n",
  133. " pred = paddle.max(pred, axis=-1)[1]\n",
  134. " return {\"pred\": pred, \"target\": target}"
  135. ]
  136. },
  137. {
  138. "cell_type": "code",
  139. "execution_count": null,
  140. "id": "c63b030f",
  141. "metadata": {},
  142. "outputs": [],
  143. "source": [
  144. "model = ClsByPaddle(vocab_size=len(vocab), embedding_dim=100, output_dim=2)\n",
  145. "\n",
  146. "model"
  147. ]
  148. },
  149. {
  150. "cell_type": "code",
  151. "execution_count": null,
  152. "id": "2997c0aa",
  153. "metadata": {},
  154. "outputs": [],
  155. "source": [
  156. "from paddle.optimizer import AdamW\n",
  157. "\n",
  158. "optimizers = AdamW(parameters=model.parameters(), learning_rate=1e-2)"
  159. ]
  160. },
  161. {
  162. "cell_type": "code",
  163. "execution_count": null,
  164. "id": "ead35fb8",
  165. "metadata": {},
  166. "outputs": [],
  167. "source": [
  168. "from fastNLP import prepare_paddle_dataloader\n",
  169. "\n",
  170. "# train_dataloader = prepare_paddle_dataloader(train_dataset, batch_size=16, shuffle=True)\n",
  171. "# evaluate_dataloader = prepare_paddle_dataloader(evaluate_dataset, batch_size=16)\n",
  172. "\n",
  173. "dl_bundle = prepare_paddle_dataloader(data_bundle, batch_size=16, shuffle=True)"
  174. ]
  175. },
  176. {
  177. "cell_type": "code",
  178. "execution_count": null,
  179. "id": "25e8da83",
  180. "metadata": {},
  181. "outputs": [],
  182. "source": [
  183. "from fastNLP import Trainer, Accuracy\n",
  184. "\n",
  185. "trainer = Trainer(\n",
  186. " model=model,\n",
  187. " driver='paddle',\n",
  188. " device='gpu', # 'cpu', 'gpu', 'gpu:x'\n",
  189. " n_epochs=10,\n",
  190. " optimizers=optimizers,\n",
  191. " train_dataloader=dl_bundle['train'], # train_dataloader,\n",
  192. " evaluate_dataloaders=dl_bundle['dev'], # evaluate_dataloader,\n",
  193. " metrics={'acc': Accuracy()}\n",
  194. ")"
  195. ]
  196. },
  197. {
  198. "cell_type": "code",
  199. "execution_count": null,
  200. "id": "d63c5d74",
  201. "metadata": {},
  202. "outputs": [],
  203. "source": [
  204. "trainer.run(num_eval_batch_per_dl=10) # 然后卡了?"
  205. ]
  206. },
  207. {
  208. "cell_type": "markdown",
  209. "id": "cb9a0b3c",
  210. "metadata": {},
  211. "source": [
  212. "## 2. fastNLP 结合 jittor 训练模型"
  213. ]
  214. },
  215. {
  216. "cell_type": "code",
  217. "execution_count": null,
  218. "id": "c600191d",
  219. "metadata": {},
  220. "outputs": [],
  221. "source": [
  222. "import jittor\n",
  223. "import jittor.nn as nn\n",
  224. "\n",
  225. "from jittor import Module\n",
  226. "\n",
  227. "\n",
  228. "class ClsByJittor(Module):\n",
  229. " def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):\n",
  230. " Module.__init__(self)\n",
  231. " self.hidden_dim = hidden_dim\n",
  232. "\n",
  233. " self.embedding = nn.Embedding(num=vocab_size, dim=embedding_dim)\n",
  234. " self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, \n",
  235. " num_layers=num_layers, bidirectional=True, dropout=dropout)\n",
  236. " self.mlp = nn.Sequential([nn.Linear(hidden_dim * 2, hidden_dim * 2),\n",
  237. " nn.ReLU(),\n",
  238. " nn.Linear(hidden_dim * 2, output_dim)])\n",
  239. "\n",
  240. " self.loss_fn = nn.BCELoss()\n",
  241. "\n",
  242. " def execute(self, words):\n",
  243. " output = self.embedding(words)\n",
  244. " output, (hidden, cell) = self.lstm(output)\n",
  245. " # hidden = jittor.randn((2, words.shape[0], self.hidden_dim))\n",
  246. " output = self.mlp(jittor.concat((hidden[-1], hidden[-2]), axis=1))\n",
  247. " return output\n",
  248. " \n",
  249. " def train_step(self, words, target):\n",
  250. " pred = self(words)\n",
  251. " return {\"loss\": self.loss_fn(pred, target)}\n",
  252. "\n",
  253. " def evaluate_step(self, words, target):\n",
  254. " pred = self(words)\n",
  255. " pred = jittor.max(pred, axis=-1)[1]\n",
  256. " return {\"pred\": pred, \"target\": target}"
  257. ]
  258. },
  259. {
  260. "cell_type": "code",
  261. "execution_count": null,
  262. "id": "a94ed8c4",
  263. "metadata": {},
  264. "outputs": [],
  265. "source": [
  266. "model = ClsByJittor(vocab_size=len(vocab), embedding_dim=100, output_dim=2)\n",
  267. "\n",
  268. "model"
  269. ]
  270. },
  271. {
  272. "cell_type": "code",
  273. "execution_count": null,
  274. "id": "6d15ebc1",
  275. "metadata": {},
  276. "outputs": [],
  277. "source": [
  278. "from jittor.optim import AdamW\n",
  279. "\n",
  280. "optimizers = AdamW(params=model.parameters(), lr=1e-2)"
  281. ]
  282. },
  283. {
  284. "cell_type": "code",
  285. "execution_count": null,
  286. "id": "95d8d09e",
  287. "metadata": {},
  288. "outputs": [],
  289. "source": [
  290. "from fastNLP import prepare_jittor_dataloader\n",
  291. "\n",
  292. "# train_dataloader = prepare_jittor_dataloader(train_dataset, batch_size=16, shuffle=True)\n",
  293. "# evaluate_dataloader = prepare_jittor_dataloader(evaluate_dataset, batch_size=16)\n",
  294. "\n",
  295. "dl_bundle = prepare_jittor_dataloader(data_bundle, batch_size=16, shuffle=True)"
  296. ]
  297. },
  298. {
  299. "cell_type": "code",
  300. "execution_count": null,
  301. "id": "917eab81",
  302. "metadata": {},
  303. "outputs": [],
  304. "source": [
  305. "from fastNLP import Trainer, Accuracy\n",
  306. "\n",
  307. "trainer = Trainer(\n",
  308. " model=model,\n",
  309. " driver='jittor',\n",
  310. " device='gpu', # 'cpu', 'gpu', 'cuda'\n",
  311. " n_epochs=10,\n",
  312. " optimizers=optimizers,\n",
  313. " train_dataloader=dl_bundle['train'], # train_dataloader,\n",
  314. " evaluate_dataloaders=dl_bundle['dev'], # evaluate_dataloader,\n",
  315. " metrics={'acc': Accuracy()}\n",
  316. ")"
  317. ]
  318. },
  319. {
  320. "cell_type": "code",
  321. "execution_count": null,
  322. "id": "f7c4ac5a",
  323. "metadata": {},
  324. "outputs": [],
  325. "source": [
  326. "trainer.run(num_eval_batch_per_dl=10)"
  327. ]
  328. }
  329. ],
  330. "metadata": {
  331. "kernelspec": {
  332. "display_name": "Python 3 (ipykernel)",
  333. "language": "python",
  334. "name": "python3"
  335. },
  336. "language_info": {
  337. "codemirror_mode": {
  338. "name": "ipython",
  339. "version": 3
  340. },
  341. "file_extension": ".py",
  342. "mimetype": "text/x-python",
  343. "name": "python",
  344. "nbconvert_exporter": "python",
  345. "pygments_lexer": "ipython3",
  346. "version": "3.7.13"
  347. }
  348. },
  349. "nbformat": 4,
  350. "nbformat_minor": 5
  351. }