|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'应当为一个字符串,其值应当为以下之一:``[None, \"dist\", \"unrepeatdist\"]``;为 ``None`` 时,表示不需要考虑当前 ``dataloader`` 切换为分布式状态;为 ``\"dist\"`` 时,表示该 ``dataloader`` 应该保证每个 ``gpu`` 上返回的 ``batch`` 的数量是一样多的,允许出现少量 ``sample`` ,在 不同 ``gpu`` 上出现重复;为 ``\"unrepeatdist\"`` 时,表示该 ``dataloader`` 应该保证所有 ``gpu`` 上迭代出来的数据合并起来应该刚好等于原始的 数据,允许不同 ``gpu`` 上 ``batch`` 的数量不一致。其中 ``trainer`` 中 ``kwargs`` 的参数 ``use_dist_sampler`` 为 ``True`` 时,该值为 ``\"dist\"``; 否则为 ``None`` ,``evaluator`` 中的 ``kwargs`` 的参数 ``use_dist_sampler`` 为 ``True`` 时,该值为 ``\"unrepeatdist\"``,否则为 ``None``; 注意当 ``dist`` 为 ``ReproducibleSampler, ReproducibleBatchSampler`` 时,是断点重训加载时 ``driver.load`` 函数在调用; 当 ``dist`` 为 ``str`` 或者 ``None`` 时,是 ``trainer`` 在初始化时调用该函数;'"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import re\n",
- "import sys\n",
- "sys.path.append(\"../\")\n",
- "# import fastNLP\n",
- "\n",
- "def get_class(text):\n",
- " return f\":class:`~{text}`\"\n",
- "\n",
- "def get_meth(text):\n",
- " return f\":meth:`~{text}`\"\n",
- "\n",
- "def get_module(text):\n",
- " return f\":mod:`~{text}`\"\n",
- "\n",
- "def replace(matched):\n",
- " \"\"\"\n",
- " \"\"\"\n",
- " text = matched.group()\n",
- " non_space = text.strip()\n",
- " if non_space == \"\":\n",
- " return text\n",
- " # 如果原本就添加了 `,那么只加一个\n",
- " if non_space.startswith(\"`\"):\n",
- " res = \"`\" + non_space\n",
- " else:\n",
- " res = \"``\" + non_space\n",
- " if non_space.endswith(\"`\"):\n",
- " res += \"`\"\n",
- " else:\n",
- " res += \"``\"\n",
- " return text.replace(non_space, f\"{res}\")\n",
- "\n",
- "def transfer(text):\n",
- " \"\"\"\n",
- " 将输入的 ``text`` 中的英文单词添加 \"``\"。在得到结果后最好手动检查一下,\n",
- " \"\"\"\n",
- " res = re.sub(\n",
- " # 匹配字母、下划线、点、逗号、引号、中括号和`\n",
- " pattern=r\"[a-zA-Z_ \\.,\\\"\\'\\[\\]`]+\",\n",
- " repl=replace,\n",
- " string=text\n",
- " )\n",
- " return res\n",
- "\n",
- "\n",
- "text = '应当为一个字符串,其值应当为以下之一:[None, \"dist\", \"unrepeatdist\"];为 None 时,表示不需要考虑当前 dataloader \\\n",
- " 切换为分布式状态;为 \"dist\" 时,表示该 dataloader 应该保证每个 gpu 上返回的 batch 的数量是一样多的,允许出现少量 sample ,在 \\\n",
- " 不同 gpu 上出现重复;为 \"unrepeatdist\" 时,表示该 dataloader 应该保证所有 gpu 上迭代出来的数据合并起来应该刚好等于原始的 \\\n",
- " 数据,允许不同 gpu 上 batch 的数量不一致。其中 trainer 中 kwargs 的参数 `use_dist_sampler` 为 True 时,该值为 \"dist\"; \\\n",
- " 否则为 None ,evaluator 中的 kwargs 的参数 `use_dist_sampler` 为 True 时,该值为 \"unrepeatdist\",否则为 None; \\\n",
- " 注意当 dist 为 ReproducibleSampler, ReproducibleBatchSampler 时,是断点重训加载时 driver.load 函数在调用; \\\n",
- " 当 dist 为 str 或者 None 时,是 trainer 在初始化时调用该函数;'\n",
- "transfer(text)"
- ]
- }
- ],
- "metadata": {
- "interpreter": {
- "hash": "c79c3370938623706c2d55a7989cf7c7c31ff0346157477d22565bb370580b77"
- },
- "kernelspec": {
- "display_name": "Python 3.7.13 ('fnlp')",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.13"
- },
- "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|