You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.cpp 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. /**
  2. * \file dnn/src/cuda/rnn/utils.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing, software
  8. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  9. * ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/cuda/rnn/utils.h"
  12. #include "src/cuda/utils.h"
  13. #include <cudnn.h>
  14. namespace megdnn {
  15. namespace cuda {
  16. namespace rnn {
  17. /*RNNForwardDescHolder::RNNForwardDescHolder(Handle* handle, size_t seq_len, size_t
  18. batch_size, size_t hidden_size, size_t input_size, size_t proj_size, size_t num_layers,
  19. bool bidirectional, bool bias, DType dtype, cudnnRNNMode_t _mode, cudnnForwardMode_t
  20. _fwdMode) : mode(_mode), fwdMode(_fwdMode)
  21. {
  22. size_t D = bidirectional ? 2 : 1;
  23. // TODO: set dropout to 0 in inference mode
  24. dropout_desc.set_no_dropout(handle);
  25. // seq len is unified (not packed)
  26. // cuda_check(cudaMalloc((void**)&devSeqLengths, sizeof(int32_t) * batch_size));
  27. devSeqLengths = (int32_t*)malloc(sizeof(int32_t) * batch_size);
  28. for (size_t i = 0; i < batch_size; ++i) devSeqLengths[i] = seq_len;
  29. // proj size should be smaller than hidden size according to cudnn api
  30. // otherwise it is disabled
  31. proj_size = (proj_size > hidden_size || proj_size == 0) ? hidden_size :
  32. proj_size; rnn_desc.set( input_size, hidden_size, proj_size, num_layers, bidirectional,
  33. bias, dtype, mode, dropout_desc, handle
  34. );
  35. x_desc.set(batch_size, input_size, seq_len, devSeqLengths, dtype);
  36. y_desc.set(batch_size, D * proj_size, seq_len,
  37. devSeqLengths, dtype);
  38. h_desc.set_nd(TensorLayout(TensorShape{D * num_layers, batch_size, proj_size},
  39. dtype));
  40. cudnn_check(cudnnGetRNNWeightSpaceSize(cudnn_handle(handle), rnn_desc.desc,
  41. &weight_size));
  42. cudnn_check(cudnnGetRNNTempSpaceSizes(
  43. cudnn_handle(handle), rnn_desc.desc, fwdMode, x_desc.desc,
  44. &workspace_size, &reserveSpace_size
  45. ));
  46. }
  47. RNNForwardDescHolder::~RNNForwardDescHolder() {
  48. // cuda_check(cudaFree(devSeqLengths));
  49. free(devSeqLengths);
  50. }*/
  51. RNNForwardDescHolder_v6::RNNForwardDescHolder_v6(
  52. Handle* handle, size_t seq_len, size_t batch_size, size_t hidden_size,
  53. size_t input_size, size_t proj_size, size_t num_layers, bool bidirectional,
  54. bool bias, DType dtype, cudnnRNNMode_t _mode)
  55. : mode(_mode), seq_len(seq_len) {
  56. size_t D = bidirectional ? 2 : 1;
  57. // TODO: set dropout to 0 in inference mode
  58. dropout_desc.set_no_dropout(handle);
  59. proj_size = (proj_size > hidden_size || proj_size == 0) ? hidden_size : proj_size;
  60. rnn_desc.set(
  61. input_size, hidden_size, proj_size, num_layers, bidirectional, bias, dtype,
  62. mode, dropout_desc, handle);
  63. x_descs.resize(seq_len);
  64. y_descs.resize(seq_len);
  65. for (size_t i = 0; i < seq_len; ++i) {
  66. x_descs[i].set_nd(TensorLayout(TensorShape{batch_size, input_size}, dtype), 3);
  67. y_descs[i].set_nd(
  68. TensorLayout(TensorShape{batch_size, D * hidden_size}, dtype), 3);
  69. }
  70. #define SET_H(_var) \
  71. _var.set_nd(TensorLayout( \
  72. TensorShape{D * num_layers, batch_size, hidden_size}, dtype));
  73. SET_H(hx_desc)
  74. SET_H(cx_desc)
  75. SET_H(hy_desc)
  76. SET_H(cy_desc)
  77. #undef SET_H
  78. std::vector<cudnnTensorDescriptor_t> x_desc_arr = get_descs(x_descs);
  79. cudnn_check(cudnnGetRNNWorkspaceSize(
  80. cudnn_handle(handle), rnn_desc.desc, seq_len, x_desc_arr.data(),
  81. &workspace_size));
  82. cudnn_check(cudnnGetRNNTrainingReserveSize(
  83. cudnn_handle(handle), rnn_desc.desc, seq_len, x_desc_arr.data(),
  84. &reserveSpace_size));
  85. }
  86. RNNForwardDescHolder_v6 get_RNNDescHolder_v6(
  87. Handle* handle, megdnn::RNNForward::Param& _param, const TensorLayout& input) {
  88. size_t seq_len = input.shape[0];
  89. size_t batch_size = input.shape[1];
  90. size_t input_size = input.shape[2];
  91. cudnnRNNMode_t mode;
  92. using NonlineMode = param::RNN::NonlineMode;
  93. switch (_param.nonlineMode) {
  94. case NonlineMode::RELU:
  95. mode = CUDNN_RNN_RELU;
  96. break;
  97. case NonlineMode::TANH:
  98. mode = CUDNN_RNN_TANH;
  99. break;
  100. }
  101. RNNForwardDescHolder_v6 desc_holder(
  102. handle, seq_len, batch_size, _param.hidden_size, input_size,
  103. _param.proj_size, _param.num_layers, _param.bidirectional, _param.bias,
  104. input.dtype, mode);
  105. return desc_holder;
  106. }
  107. std::vector<cudnnTensorDescriptor_t> get_descs(const std::vector<TensorDesc>& descs) {
  108. std::vector<cudnnTensorDescriptor_t> r;
  109. r.reserve(descs.size());
  110. for (auto& desc : descs) {
  111. r.emplace_back(desc.desc);
  112. }
  113. return r;
  114. }
  115. } // namespace rnn
  116. } // namespace cuda
  117. } // namespace megdnn