You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

model.py 4.3 kB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import torch
  2. import torch.nn as nn
  3. from torch.autograd import Variable
  4. import torch.nn.functional as F
  5. def pack_sequence(tensor_seq, padding_value=0.0):
  6. if len(tensor_seq) <= 0:
  7. return
  8. length = [v.size(0) for v in tensor_seq]
  9. max_len = max(length)
  10. size = [len(tensor_seq), max_len]
  11. size.extend(list(tensor_seq[0].size()[1:]))
  12. ans = torch.Tensor(*size).fill_(padding_value)
  13. if tensor_seq[0].data.is_cuda:
  14. ans = ans.cuda()
  15. ans = Variable(ans)
  16. for i, v in enumerate(tensor_seq):
  17. ans[i, :length[i], :] = v
  18. return ans
  19. class HAN(nn.Module):
  20. def __init__(self, input_size, output_size,
  21. word_hidden_size, word_num_layers, word_context_size,
  22. sent_hidden_size, sent_num_layers, sent_context_size):
  23. super(HAN, self).__init__()
  24. self.word_layer = AttentionNet(input_size,
  25. word_hidden_size,
  26. word_num_layers,
  27. word_context_size)
  28. self.sent_layer = AttentionNet(2* word_hidden_size,
  29. sent_hidden_size,
  30. sent_num_layers,
  31. sent_context_size)
  32. self.output_layer = nn.Linear(2* sent_hidden_size, output_size)
  33. self.softmax = nn.LogSoftmax(dim=1)
  34. def forward(self, batch_doc):
  35. # input is a sequence of matrix
  36. doc_vec_list = []
  37. for doc in batch_doc:
  38. # doc's dim (num_sent, seq_len, word_dim)
  39. sent_mat = self.word_layer(doc)
  40. # sent_mat's dim (num_sent, vec_dim)
  41. doc_vec_list.append(sent_mat)
  42. doc_vec = self.sent_layer(pack_sequence(doc_vec_list))
  43. output = self.softmax(self.output_layer(doc_vec))
  44. return output
  45. class AttentionNet(nn.Module):
  46. def __init__(self, input_size, gru_hidden_size, gru_num_layers, context_vec_size):
  47. super(AttentionNet, self).__init__()
  48. self.input_size = input_size
  49. self.gru_hidden_size = gru_hidden_size
  50. self.gru_num_layers = gru_num_layers
  51. self.context_vec_size = context_vec_size
  52. # Encoder
  53. self.gru = nn.GRU(input_size=input_size,
  54. hidden_size=gru_hidden_size,
  55. num_layers=gru_num_layers,
  56. batch_first=True,
  57. bidirectional=True)
  58. # Attention
  59. self.fc = nn.Linear(2* gru_hidden_size, context_vec_size)
  60. self.tanh = nn.Tanh()
  61. self.softmax = nn.Softmax(dim=1)
  62. # context vector
  63. self.context_vec = nn.Parameter(torch.Tensor(context_vec_size, 1))
  64. self.context_vec.data.uniform_(-0.1, 0.1)
  65. def forward(self, inputs):
  66. # inputs's dim (batch_size, seq_len, word_dim)
  67. h_t, hidden = self.gru(inputs)
  68. u = self.tanh(self.fc(h_t))
  69. # u's dim (batch_size, seq_len, context_vec_size)
  70. alpha = self.softmax(torch.matmul(u, self.context_vec))
  71. # alpha's dim (batch_size, seq_len, 1)
  72. output = torch.bmm(torch.transpose(h_t, 1, 2), alpha)
  73. # output's dim (batch_size, 2*hidden_size, 1)
  74. return torch.squeeze(output, dim=2)
  75. if __name__ == '__main__':
  76. import numpy as np
  77. use_cuda = True
  78. net = HAN(input_size=200, output_size=5,
  79. word_hidden_size=50, word_num_layers=1, word_context_size=100,
  80. sent_hidden_size=50, sent_num_layers=1, sent_context_size=100)
  81. optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
  82. criterion = nn.NLLLoss()
  83. test_time = 10
  84. batch_size = 64
  85. if use_cuda:
  86. net.cuda()
  87. print('test training')
  88. for step in range(test_time):
  89. x_data = [torch.randn(np.random.randint(1,10), 200, 200) for i in range(batch_size)]
  90. y_data = torch.LongTensor([np.random.randint(0, 5) for i in range(batch_size)])
  91. if use_cuda:
  92. x_data = [x_i.cuda() for x_i in x_data]
  93. y_data = y_data.cuda()
  94. x = [Variable(x_i) for x_i in x_data]
  95. y = Variable(y_data)
  96. predict = net(x)
  97. loss = criterion(predict, y)
  98. optimizer.zero_grad()
  99. loss.backward()
  100. optimizer.step()
  101. print(loss.data[0])

一款轻量级的自然语言处理(NLP)工具包,目标是减少用户项目中的工程型代码,例如数据处理循环、训练循环、多卡运行等