You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

model.py 4.5 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. # Copyright (c) Microsoft Corporation.
  2. # Licensed under the MIT license.
  3. import numpy as np
  4. import torch
  5. import torch.nn as nn
  6. from pytorch import mutables
  7. from ops import ConvBN, LinearCombine, AvgPool, MaxPool, RNN, Attention, BatchNorm
  8. from utils import GlobalMaxPool, GlobalAvgPool
  9. class Layer(mutables.MutableScope):
  10. def __init__(self,
  11. key,
  12. prev_keys,
  13. hidden_units,
  14. choose_from_k,
  15. cnn_keep_prob,
  16. lstm_keep_prob,
  17. att_keep_prob,
  18. att_mask):
  19. super(Layer, self).__init__(key)
  20. def conv_shortcut(kernel_size):
  21. return ConvBN(kernel_size, hidden_units, hidden_units, cnn_keep_prob, False, True)
  22. self.n_candidates = len(prev_keys)
  23. if self.n_candidates:
  24. self.prec = mutables.InputChoice(choose_from=prev_keys[-choose_from_k:], n_chosen=1)
  25. else:
  26. # first layer, skip input choice
  27. self.prec = None
  28. self.op = mutables.LayerChoice([
  29. conv_shortcut(1),
  30. conv_shortcut(3),
  31. conv_shortcut(5),
  32. conv_shortcut(7),
  33. AvgPool(3, False, True),
  34. MaxPool(3, False, True),
  35. RNN(hidden_units, lstm_keep_prob),
  36. Attention(hidden_units, 4, att_keep_prob, att_mask)
  37. ])
  38. if self.n_candidates:
  39. self.skipconnect = mutables.InputChoice(choose_from=prev_keys)
  40. else:
  41. self.skipconnect = None
  42. self.bn = BatchNorm(hidden_units, False, True)
  43. def forward(self, last_layer, prev_layers, mask):
  44. # pass an extra last_layer to deal with layer 0 (prev_layers is empty)
  45. if self.prec is None:
  46. prec = last_layer
  47. else:
  48. prec = self.prec(prev_layers[-self.prec.n_candidates:]) # skip first
  49. out = self.op(prec, mask)
  50. if self.skipconnect is not None:
  51. connection = self.skipconnect(prev_layers[-self.skipconnect.n_candidates:])
  52. if connection is not None:
  53. out += connection
  54. out = self.bn(out, mask)
  55. return out
  56. class Model(nn.Module):
  57. def __init__(self, embedding, hidden_units=256, num_layers=24, num_classes=5, choose_from_k=5,
  58. lstm_keep_prob=0.5, cnn_keep_prob=0.5, att_keep_prob=0.5, att_mask=True,
  59. embed_keep_prob=0.5, final_output_keep_prob=1.0, global_pool="avg"):
  60. super(Model, self).__init__()
  61. # load word embedding
  62. self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False)
  63. self.hidden_units = hidden_units
  64. self.num_layers = num_layers
  65. self.num_classes = num_classes
  66. # 第一层
  67. self.init_conv = ConvBN(1, self.embedding.embedding_dim, hidden_units, cnn_keep_prob, False, True)
  68. self.layers = nn.ModuleList()
  69. candidate_keys_pool = [] # ['layer_0', 'layer_1']
  70. for layer_id in range(self.num_layers):
  71. k = "layer_{}".format(layer_id)
  72. self.layers.append(Layer(k, candidate_keys_pool, hidden_units, choose_from_k,
  73. cnn_keep_prob, lstm_keep_prob, att_keep_prob, att_mask))
  74. candidate_keys_pool.append(k)
  75. self.linear_combine = LinearCombine(self.num_layers)
  76. self.linear_out = nn.Linear(self.hidden_units, self.num_classes)
  77. self.embed_dropout = nn.Dropout(p=1 - embed_keep_prob)
  78. self.output_dropout = nn.Dropout(p=1 - final_output_keep_prob)
  79. assert global_pool in ["max", "avg"]
  80. if global_pool == "max":
  81. self.global_pool = GlobalMaxPool()
  82. elif global_pool == "avg":
  83. self.global_pool = GlobalAvgPool()
  84. def forward(self, inputs):
  85. sent_ids, mask = inputs
  86. seq = self.embedding(sent_ids.long())
  87. seq = self.embed_dropout(seq)
  88. seq = torch.transpose(seq, 1, 2) # from (N, L, C) -> (N, C, L)
  89. # from (batch_size, seq_len, feat_size) -> (batch_size, feat_size, seq_len)
  90. x = self.init_conv(seq, mask)
  91. prev_layers = []
  92. for layer in self.layers:
  93. x = layer(x, prev_layers, mask)
  94. prev_layers.append(x)
  95. x = self.linear_combine(torch.stack(prev_layers))
  96. x = self.global_pool(x, mask)
  97. x = self.output_dropout(x)
  98. x = self.linear_out(x)
  99. return x

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能