You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

transfer.ipynb 4.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 20,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "data": {
  10. "text/plain": [
  11. "'应当为一个字符串,其值应当为以下之一:``[None, \"dist\", \"unrepeatdist\"]``;为 ``None`` 时,表示不需要考虑当前 ``dataloader`` 切换为分布式状态;为 ``\"dist\"`` 时,表示该 ``dataloader`` 应该保证每个 ``gpu`` 上返回的 ``batch`` 的数量是一样多的,允许出现少量 ``sample`` ,在 不同 ``gpu`` 上出现重复;为 ``\"unrepeatdist\"`` 时,表示该 ``dataloader`` 应该保证所有 ``gpu`` 上迭代出来的数据合并起来应该刚好等于原始的 数据,允许不同 ``gpu`` 上 ``batch`` 的数量不一致。其中 ``trainer`` 中 ``kwargs`` 的参数 ``use_dist_sampler`` 为 ``True`` 时,该值为 ``\"dist\"``; 否则为 ``None`` ,``evaluator`` 中的 ``kwargs`` 的参数 ``use_dist_sampler`` 为 ``True`` 时,该值为 ``\"unrepeatdist\"``,否则为 ``None``; 注意当 ``dist`` 为 ``ReproducibleSampler, ReproducibleBatchSampler`` 时,是断点重训加载时 ``driver.load`` 函数在调用; 当 ``dist`` 为 ``str`` 或者 ``None`` 时,是 ``trainer`` 在初始化时调用该函数;'"
  12. ]
  13. },
  14. "execution_count": 20,
  15. "metadata": {},
  16. "output_type": "execute_result"
  17. }
  18. ],
  19. "source": [
  20. "import re\n",
  21. "import sys\n",
  22. "sys.path.append(\"../\")\n",
  23. "# import fastNLP\n",
  24. "\n",
  25. "def get_class(text):\n",
  26. " return f\":class:`~{text}`\"\n",
  27. "\n",
  28. "def get_meth(text):\n",
  29. " return f\":meth:`~{text}`\"\n",
  30. "\n",
  31. "def get_module(text):\n",
  32. " return f\":mod:`~{text}`\"\n",
  33. "\n",
  34. "def replace(matched):\n",
  35. " \"\"\"\n",
  36. " \"\"\"\n",
  37. " text = matched.group()\n",
  38. " non_space = text.strip()\n",
  39. " if non_space == \"\":\n",
  40. " return text\n",
  41. " # 如果原本就添加了 `,那么只加一个\n",
  42. " if non_space.startswith(\"`\"):\n",
  43. " res = \"`\" + non_space\n",
  44. " else:\n",
  45. " res = \"``\" + non_space\n",
  46. " if non_space.endswith(\"`\"):\n",
  47. " res += \"`\"\n",
  48. " else:\n",
  49. " res += \"``\"\n",
  50. " return text.replace(non_space, f\"{res}\")\n",
  51. "\n",
  52. "def transfer(text):\n",
  53. " \"\"\"\n",
  54. " 将输入的 ``text`` 中的英文单词添加 \"``\"。在得到结果后最好手动检查一下,\n",
  55. " \"\"\"\n",
  56. " res = re.sub(\n",
  57. " # 匹配字母、下划线、点、逗号、引号、中括号和`\n",
  58. " pattern=r\"[a-zA-Z_ \\.,\\\"\\'\\[\\]`]+\",\n",
  59. " repl=replace,\n",
  60. " string=text\n",
  61. " )\n",
  62. " return res\n",
  63. "\n",
  64. "\n",
  65. "text = '应当为一个字符串,其值应当为以下之一:[None, \"dist\", \"unrepeatdist\"];为 None 时,表示不需要考虑当前 dataloader \\\n",
  66. " 切换为分布式状态;为 \"dist\" 时,表示该 dataloader 应该保证每个 gpu 上返回的 batch 的数量是一样多的,允许出现少量 sample ,在 \\\n",
  67. " 不同 gpu 上出现重复;为 \"unrepeatdist\" 时,表示该 dataloader 应该保证所有 gpu 上迭代出来的数据合并起来应该刚好等于原始的 \\\n",
  68. " 数据,允许不同 gpu 上 batch 的数量不一致。其中 trainer 中 kwargs 的参数 `use_dist_sampler` 为 True 时,该值为 \"dist\"; \\\n",
  69. " 否则为 None ,evaluator 中的 kwargs 的参数 `use_dist_sampler` 为 True 时,该值为 \"unrepeatdist\",否则为 None; \\\n",
  70. " 注意当 dist 为 ReproducibleSampler, ReproducibleBatchSampler 时,是断点重训加载时 driver.load 函数在调用; \\\n",
  71. " 当 dist 为 str 或者 None 时,是 trainer 在初始化时调用该函数;'\n",
  72. "transfer(text)"
  73. ]
  74. }
  75. ],
  76. "metadata": {
  77. "interpreter": {
  78. "hash": "c79c3370938623706c2d55a7989cf7c7c31ff0346157477d22565bb370580b77"
  79. },
  80. "kernelspec": {
  81. "display_name": "Python 3.7.13 ('fnlp')",
  82. "language": "python",
  83. "name": "python3"
  84. },
  85. "language_info": {
  86. "codemirror_mode": {
  87. "name": "ipython",
  88. "version": 3
  89. },
  90. "file_extension": ".py",
  91. "mimetype": "text/x-python",
  92. "name": "python",
  93. "nbconvert_exporter": "python",
  94. "pygments_lexer": "ipython3",
  95. "version": "3.7.13"
  96. },
  97. "orig_nbformat": 4
  98. },
  99. "nbformat": 4,
  100. "nbformat_minor": 2
  101. }