You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

checksum.cpp 2.2 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. #include "megdnn/oprs.h"
  2. #include "test/common/checker.h"
  3. #include "test/cuda/fixture.h"
  4. using namespace megdnn;
  5. using namespace test;
  6. TEST_F(CUDA, CHECKSUM_FORWARD) {
  7. auto cuda_opr = handle_cuda()->create_operator<megdnn::Checksum>(),
  8. naive_opr = handle_naive()->create_operator<megdnn::Checksum>();
  9. std::mt19937 rng(std::random_device{}());
  10. for (size_t size : {3, 8, 12345, 1024 * 1024, 1024 * 1024 * 10}) {
  11. auto aligned_size = size + ((512 - size % 512) % 512);
  12. auto run = [&](megdnn::Checksum* opr, void* ptr, bool log_size) {
  13. TensorND tensor;
  14. tensor.reset_ptr(ptr);
  15. tensor.layout.init_contiguous_stride({size});
  16. tensor.layout.dtype = dtype::Byte();
  17. WorkspaceWrapper workspace(
  18. handle_cuda(), opr->get_workspace_in_bytes(tensor.layout));
  19. if (log_size) {
  20. printf("checksum(%zu): workspace=%zu\n", size,
  21. workspace.workspace().size);
  22. }
  23. return opr->exec(tensor, workspace.workspace());
  24. };
  25. std::vector<uint8_t> buf(aligned_size);
  26. for (size_t i = 0; i < size; ++i)
  27. buf[i] = rng();
  28. auto run_offsset = [&](size_t offset) {
  29. void* dev_ptr = megdnn_malloc(handle_cuda(), buf.size() + offset);
  30. void* dev_buf = static_cast<char*>(dev_ptr) + offset;
  31. Checksum::Result res_cuda[2], res_naive[2];
  32. for (int change_last = 0; change_last < 2; ++change_last) {
  33. if (change_last)
  34. ++buf[size - 1];
  35. megdnn_memcpy_H2D(handle_cuda(), dev_buf, buf.data(), size);
  36. res_cuda[change_last] = run(cuda_opr.get(), dev_buf, !change_last);
  37. res_naive[change_last] = run(naive_opr.get(), buf.data(), false);
  38. }
  39. megdnn_free(handle_cuda(), dev_ptr);
  40. ASSERT_EQ(res_naive[0], res_cuda[0]) << "failed for size " << size;
  41. ASSERT_EQ(res_naive[1], res_cuda[1]);
  42. ASSERT_NE(res_cuda[0], res_cuda[1]);
  43. };
  44. for (size_t i = 0; i < 8; ++i) {
  45. run_offsset(i);
  46. }
  47. }
  48. }
  49. // vim: syntax=cpp.doxygen