You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

chrome_timeline.cpp 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. /**
  2. * \file imperative/src/impl/profiler/chrome_timeline.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
  12. #include <unistd.h>
  13. #elif defined(_WIN32)
  14. #include <process.h>
  15. #else
  16. #error Unsupported platform
  17. #endif
  18. #include "nlohmann/json.hpp"
  19. #include "megbrain/imperative/utils/platform.h"
  20. #include "megbrain/utils/debug.h"
  21. #include "./formats.h"
  22. #include "./states.h"
  23. namespace mgb::imperative::profiler {
  24. class ChromeTraceEvent {
  25. public:
  26. ChromeTraceEvent& name(std::string name) {
  27. m_name = std::move(name);
  28. return *this;
  29. }
  30. ChromeTraceEvent& tid(uint64_t tid) {
  31. m_tid = std::move(tid);
  32. return *this;
  33. }
  34. ChromeTraceEvent& cat(std::string cat) {
  35. m_cat = std::move(cat);
  36. return *this;
  37. }
  38. ChromeTraceEvent& scope(std::string scope) {
  39. m_scope = std::move(scope);
  40. return *this;
  41. }
  42. ChromeTraceEvent& pid(uint64_t pid) {
  43. m_pid = pid;
  44. return *this;
  45. }
  46. ChromeTraceEvent& id(uint64_t id) {
  47. m_id = id;
  48. return *this;
  49. }
  50. ChromeTraceEvent& idx(uint64_t idx) {
  51. m_idx = idx;
  52. return *this;
  53. }
  54. template <typename TDuration>
  55. ChromeTraceEvent& ts(TDuration ts) {
  56. m_ts = std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(ts)
  57. .count();
  58. return *this;
  59. }
  60. ChromeTraceEvent& dur(uint64_t dur) {
  61. m_dur = dur;
  62. return *this;
  63. }
  64. ChromeTraceEvent& ph(char ph) {
  65. m_ph = ph;
  66. return *this;
  67. }
  68. ChromeTraceEvent& bp(char bp) {
  69. m_bp = bp;
  70. return *this;
  71. }
  72. ChromeTraceEvent& args(nlohmann::json args) {
  73. m_args = std::move(args);
  74. return *this;
  75. }
  76. ChromeTraceEvent& arg(std::string key, std::string value) {
  77. m_args[key] = value;
  78. return *this;
  79. }
  80. ChromeTraceEvent& arg(std::string key, double value) {
  81. m_args[key] = value;
  82. return *this;
  83. }
  84. ChromeTraceEvent& stack(Trace trace) {
  85. m_stack = std::move(trace);
  86. return *this;
  87. }
  88. nlohmann::json to_json() const {
  89. nlohmann::json result;
  90. auto prop_str = [&](auto key, auto value) {
  91. if (value.empty()) {
  92. return;
  93. }
  94. result[key] = value;
  95. };
  96. auto prop_num = [&](auto key, auto value) {
  97. if (!value) {
  98. return;
  99. }
  100. result[key] = value.value();
  101. };
  102. auto prop_char = [&](auto key, auto value) {
  103. if (!value) {
  104. return;
  105. }
  106. result[key] = std::string{} + value.value();
  107. };
  108. prop_str("name", m_name);
  109. prop_str("cat", m_cat);
  110. prop_str("scope", m_scope);
  111. prop_num("tid", m_tid);
  112. prop_num("pid", m_pid);
  113. prop_num("id", m_id);
  114. prop_num("idx", m_idx);
  115. prop_num("ts", m_ts);
  116. prop_num("dur", m_dur);
  117. prop_char("ph", m_ph);
  118. prop_char("bp", m_bp);
  119. if (!m_args.empty()) {
  120. result["args"] = m_args;
  121. }
  122. if (m_stack) {
  123. nlohmann::json stack;
  124. for (auto&& frame : m_stack->frames()) {
  125. stack.push_back(
  126. ssprintf("%s%ld", frame.node->name().c_str(), frame.version));
  127. }
  128. std::reverse(stack.begin(), stack.end());
  129. result["stack"] = stack;
  130. }
  131. return result;
  132. }
  133. private:
  134. std::string m_name;
  135. std::string m_cat;
  136. std::string m_scope;
  137. std::optional<uint64_t> m_tid;
  138. std::optional<uint64_t> m_pid;
  139. std::optional<uint64_t> m_id;
  140. std::optional<uint64_t> m_idx;
  141. std::optional<double> m_ts;
  142. std::optional<uint64_t> m_dur;
  143. std::optional<char> m_ph;
  144. std::optional<char> m_bp;
  145. nlohmann::json m_args;
  146. std::optional<Trace> m_stack;
  147. };
  148. class ChromeTraceEvents {
  149. public:
  150. ChromeTraceEvent& new_event() {
  151. m_content.emplace_back();
  152. return m_content.back();
  153. }
  154. std::string& metadata(std::string key) { return m_metadata[key]; }
  155. nlohmann::json to_json() const {
  156. nlohmann::json result;
  157. nlohmann::json event_list;
  158. nlohmann::json metadata;
  159. for (auto&& event : m_content) {
  160. event_list.push_back(event.to_json());
  161. }
  162. for (auto&& [key, value] : m_metadata) {
  163. metadata[key] = value;
  164. }
  165. result["traceEvents"] = event_list;
  166. result["metadata"] = metadata;
  167. return result;
  168. }
  169. std::string to_string() const {
  170. auto json = to_json();
  171. return "{"
  172. "\"traceEvents\":" +
  173. nlohmann::to_string(json["traceEvents"]) +
  174. ","
  175. "\"metadata\":" +
  176. nlohmann::to_string(json["metadata"]) + "}";
  177. }
  178. private:
  179. std::vector<ChromeTraceEvent> m_content;
  180. std::unordered_map<std::string, std::string> m_metadata;
  181. };
  182. struct ChromeTimelineEventVisitor : EventVisitor<ChromeTimelineEventVisitor> {
  183. ChromeTraceEvents trace_events;
  184. decltype(getpid()) pid = getpid();
  185. std::string pid_str = std::to_string(pid);
  186. ChromeTimelineEventVisitor() {}
  187. ChromeTraceEvent& new_event(
  188. std::string name, char ph, size_t tid, profiler::HostTime time) {
  189. return trace_events.new_event().name(name).ph(ph).pid(pid).tid(tid).ts(
  190. since_start(time));
  191. }
  192. ChromeTraceEvent& new_host_event(std::string name, char ph) {
  193. return trace_events.new_event()
  194. .name(name)
  195. .ph(ph)
  196. .pid(pid)
  197. .tid(to_tid(current->tid))
  198. .ts(since_start(current->time));
  199. }
  200. ChromeTraceEvent& new_cupti_event(
  201. std::string name, char ph, cupti::stream_t stream,
  202. cupti::time_point timestamp) {
  203. return new_event(name, ph, to_tid(stream), time_from_cupti(timestamp));
  204. }
  205. ChromeTraceEvent& new_device_event(std::string name, char ph, CompNode device) {
  206. auto time = since_start(to_device_time(current->time, device));
  207. return trace_events.new_event()
  208. .name(name)
  209. .ph(ph)
  210. .pid(pid)
  211. .tid(to_tid(device))
  212. .ts(time);
  213. }
  214. const char* to_cstr(TensorCommandKind kind) {
  215. switch (kind) {
  216. case TensorCommandKind::Put:
  217. return "Put";
  218. case TensorCommandKind::Drop:
  219. return "Drop";
  220. case TensorCommandKind::Del:
  221. return "Del";
  222. case TensorCommandKind::RecFree:
  223. return "RecFree";
  224. case TensorCommandKind::ReGen:
  225. return "ReGen";
  226. case TensorCommandKind::GetValue:
  227. return "GetValue";
  228. }
  229. return "UnknownCommand";
  230. }
  231. template <typename TEvent>
  232. void visit_event(const TEvent& event) {
  233. if constexpr (std::is_same_v<TEvent, OpDispatchEvent>) {
  234. new_host_event("OpDispatch", 'B');
  235. new_host_event(pid_str, 's')
  236. .cat("OpDispatch")
  237. .id(event.op_id)
  238. .scope(pid_str);
  239. new_host_event("OpDispatch", 'E').args(current_op->detail());
  240. } else if constexpr (std::is_same_v<TEvent, OpExecuteEvent>) {
  241. mgb_assert(event.op_id != 0);
  242. new_host_event(current_op->name, 'B');
  243. new_host_event(pid_str, 't')
  244. .cat("OpDispatch")
  245. .id(current_op->id)
  246. .scope(pid_str);
  247. } else if constexpr (std::is_same_v<TEvent, OpExecuteFinishEvent>) {
  248. new_host_event(current_op->name, 'E').args(current_op->detail());
  249. } else if constexpr (std::is_same_v<TEvent, KernelLaunchEvent>) {
  250. new_host_event(pid_str, 's')
  251. .id(event.kernel_id)
  252. .cat("KernelLaunch")
  253. .scope(pid_str);
  254. new_device_event(current_op->name, 'B', event.device).cat("Kernel");
  255. new_device_event(pid_str, 'f', event.device)
  256. .id(event.kernel_id)
  257. .bp('e')
  258. .cat("KernelLaunch")
  259. .scope(pid_str);
  260. } else if constexpr (std::is_same_v<TEvent, KernelLaunchFinishEvent>) {
  261. new_device_event(current_op->name, 'E', event.device)
  262. .cat("Kernel")
  263. .args(current_op->detail());
  264. } else if constexpr (std::is_same_v<TEvent, TensorProduceEvent>) {
  265. if (current_tensor->living_time == profiler::Duration::zero()) {
  266. new_host_event(pid_str, 's')
  267. .id(event.tensor_id)
  268. .cat("TensorLink")
  269. .scope(pid_str);
  270. } else {
  271. new_host_event(pid_str, 't')
  272. .id(event.tensor_id)
  273. .cat("TensorLink")
  274. .scope(pid_str);
  275. }
  276. } else if constexpr (std::is_same_v<TEvent, TensorUsageEvent>) {
  277. new_host_event(pid_str, 't')
  278. .id(event.tensor_id)
  279. .cat("TensorLink")
  280. .scope(pid_str);
  281. } else if constexpr (std::is_same_v<TEvent, TensorReleaseEvent>) {
  282. current_tensor->living_time += current->time - current_tensor->produced;
  283. current_tensor->produced = {};
  284. new_host_event(pid_str, 't')
  285. .id(event.tensor_id)
  286. .cat("TensorLink")
  287. .scope(pid_str);
  288. } else if constexpr (std::is_same_v<TEvent, TensorEraseEvent>) {
  289. if (current_tensor->living_time != profiler::Duration::zero()) {
  290. new_host_event(pid_str, 'f')
  291. .id(event.tensor_id)
  292. .bp('e')
  293. .cat("TensorLink")
  294. .scope(pid_str);
  295. }
  296. } else if constexpr (std::is_same_v<TEvent, TensorGetPropEvent>) {
  297. new_host_event("TensorGetProp", 'X')
  298. .dur(0)
  299. .args(current_tensor->detail(current->time))
  300. .arg("kind", imperative::to_string(event.prop));
  301. } else if constexpr (std::is_same_v<TEvent, TensorWaitPropEvent>) {
  302. new_host_event("TensorWaitProp", 'B');
  303. } else if constexpr (std::is_same_v<TEvent, TensorWaitPropFinishEvent>) {
  304. new_host_event(pid_str, 'f')
  305. .id(event.tensor_id)
  306. .bp('e')
  307. .cat("TensorProp")
  308. .scope(pid_str);
  309. new_host_event("TensorWaitProp", 'E')
  310. .args(current_tensor->detail(current->time));
  311. } else if constexpr (std::is_same_v<TEvent, TensorNotifyPropEvent>) {
  312. new_host_event(pid_str, 's')
  313. .id(event.tensor_id)
  314. .cat("TensorProp")
  315. .scope(pid_str);
  316. } else if constexpr (std::is_same_v<TEvent, SampleDeviceFinishEvent>) {
  317. std::string device_name = event.device.locator().to_string();
  318. new_host_event(ssprintf("%s_alloc_mem", device_name.c_str()), 'C')
  319. .arg("value", event.total_memory - event.free_memory);
  320. } else if constexpr (std::is_same_v<TEvent, TensorCommandEvent>) {
  321. new_host_event(
  322. ssprintf("%s %zu", to_cstr(event.kind), event.tensor_id), 'B');
  323. } else if constexpr (std::is_same_v<TEvent, TensorCommandFinishEvent>) {
  324. new_host_event(
  325. ssprintf("%s %zu", to_cstr(event.kind), event.tensor_id), 'E')
  326. .args(current_tensor->detail(current->time));
  327. } else if constexpr (std::is_same_v<TEvent, ScopeEvent>) {
  328. new_host_event(event.name, 'B');
  329. } else if constexpr (std::is_same_v<TEvent, ScopeFinishEvent>) {
  330. new_host_event(event.name, 'E');
  331. } else if constexpr (std::is_same_v<TEvent, OpInputEvent>) {
  332. new_host_event(ssprintf("Input %zu", event.tensor_id), 'B')
  333. .args(current_tensor->detail(current->time));
  334. } else if constexpr (std::is_same_v<TEvent, OpInputFinishEvent>) {
  335. new_host_event(ssprintf("Input %zu", event.tensor_id), 'E')
  336. .args(current_tensor->detail(current->time));
  337. } else if constexpr (std::is_same_v<TEvent, OpOutputEvent>) {
  338. new_host_event(ssprintf("Output %zu", event.tensor_id), 'B')
  339. .args(current_tensor->detail(current->time));
  340. } else if constexpr (std::is_same_v<TEvent, OpOutputFinishEvent>) {
  341. new_host_event(ssprintf("Output %zu", event.tensor_id), 'E')
  342. .args(current_tensor->detail(current->time));
  343. } else if constexpr (std::is_same_v<TEvent, StartProfileEvent>) {
  344. new_host_event("StartProfile", 'B');
  345. } else if constexpr (std::is_same_v<TEvent, StartProfileFinishEvent>) {
  346. new_host_event("StartProfile", 'E');
  347. } else if constexpr (std::is_same_v<TEvent, StopProfileEvent>) {
  348. new_host_event("StopProfile", 'B');
  349. } else if constexpr (std::is_same_v<TEvent, StopProfileFinishEvent>) {
  350. new_host_event("StopProfile", 'E');
  351. } else if constexpr (std::is_same_v<TEvent, CustomEvent>) {
  352. new_host_event(event.title, 'B');
  353. if (event.device.valid()) {
  354. new_device_event(event.title, 'B', event.device);
  355. }
  356. } else if constexpr (std::is_same_v<TEvent, CustomFinishEvent>) {
  357. new_host_event(event.title, 'E');
  358. if (event.device.valid()) {
  359. new_device_event(event.title, 'E', event.device);
  360. }
  361. } else if constexpr (std::is_same_v<TEvent, AutoEvictEvent>) {
  362. new_host_event("AutoEvict", 'B');
  363. } else if constexpr (std::is_same_v<TEvent, AutoEvictFinishEvent>) {
  364. new_host_event("AutoEvict", 'E');
  365. } else if constexpr (std::is_same_v<TEvent, HostToDeviceEvent>) {
  366. new_device_event("HostToDevice", 'B', event.device);
  367. } else if constexpr (std::is_same_v<TEvent, HostToDeviceFinishEvent>) {
  368. new_device_event("HostToDevice", 'E', event.device)
  369. .arg("shape", event.layout.TensorShape::to_string())
  370. .arg("dtype", event.layout.dtype.name())
  371. .arg("nr_elements", event.layout.total_nr_elems())
  372. .arg("device", event.device.to_string());
  373. } else if constexpr (std::is_same_v<TEvent, RecordDeviceEvent>) {
  374. auto current_host_time = current->time;
  375. auto current_device_time =
  376. to_device_time(current->time, event.event->comp_node());
  377. auto device_ahead = std::chrono::duration_cast<std::chrono::milliseconds>(
  378. current_device_time - current_host_time);
  379. new_host_event("device_ahead_ms", 'C').arg("value", device_ahead.count());
  380. } else if constexpr (std::is_same_v<TEvent, CUPTIKernelLaunchEvent>) {
  381. new_host_event(demangle(event.name), 'B');
  382. new_host_event(pid_str, 's')
  383. .id(event.correlation_id)
  384. .cat("KernelLink")
  385. .scope(pid_str);
  386. } else if constexpr (std::is_same_v<TEvent, CUPTIKernelLaunchFinishEvent>) {
  387. new_host_event(demangle(event.name), 'E');
  388. } else if constexpr (std::is_same_v<TEvent, CUPTIKernelExecuteEvent>) {
  389. new_cupti_event(demangle(event.name), 'B', event.stream, event.start)
  390. .arg("execution_time", (event.end - event.start).count());
  391. new_cupti_event(pid_str, 'f', event.stream, event.end)
  392. .id(event.correlation_id)
  393. .bp('e')
  394. .cat("KernelLink")
  395. .scope(pid_str);
  396. new_cupti_event(demangle(event.name), 'E', event.stream, event.end)
  397. .arg("execution_time", (event.end - event.start).count());
  398. } else if constexpr (std::is_same_v<TEvent, CUPTIMemcpyLaunchEvent>) {
  399. new_host_event("Memcpy", 'B');
  400. new_host_event(pid_str, 's')
  401. .id(event.correlation_id)
  402. .cat("CUPTILink")
  403. .scope(pid_str);
  404. } else if constexpr (std::is_same_v<TEvent, CUPTIMemcpyLaunchFinishEvent>) {
  405. new_host_event("Memcpy", 'E');
  406. } else if constexpr (std::is_same_v<TEvent, CUPTIMemcpyEvent>) {
  407. auto memkind2str = [](uint8_t kind) {
  408. const char* const valid_kinds[] = {
  409. "CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN",
  410. "CUPTI_ACTIVITY_MEMORY_KIND_PAGEABLE",
  411. "CUPTI_ACTIVITY_MEMORY_KIND_PINNED",
  412. "CUPTI_ACTIVITY_MEMORY_KIND_DEVICE",
  413. "CUPTI_ACTIVITY_MEMORY_KIND_ARRAY",
  414. "CUPTI_ACTIVITY_MEMORY_KIND_MANAGED",
  415. "CUPTI_ACTIVITY_MEMORY_KIND_DEVICE_STATIC",
  416. "CUPTI_ACTIVITY_MEMORY_KIND_MANAGED_STATIC"};
  417. if (kind > (sizeof(valid_kinds) / sizeof(const char*))) {
  418. return "invalid";
  419. }
  420. return valid_kinds[kind];
  421. };
  422. new_cupti_event("Memcpy", 'B', event.stream, event.start)
  423. .arg("bytes", imperative::to_string(event.bytes))
  424. .arg("src_kind", memkind2str(event.src_kind))
  425. .arg("dst_kind", memkind2str(event.dst_kind));
  426. new_cupti_event(pid_str, 'f', event.stream, event.start)
  427. .id(event.correlation_id)
  428. .bp('e')
  429. .cat("CUPTILink")
  430. .scope(pid_str);
  431. new_cupti_event("Memcpy", 'E', event.stream, event.end)
  432. .arg("bytes", imperative::to_string(event.bytes))
  433. .arg("src_kind", memkind2str(event.src_kind))
  434. .arg("dst_kind", memkind2str(event.dst_kind));
  435. } else if constexpr (std::is_same_v<TEvent, CUPTIMemsetEvent>) {
  436. new_cupti_event("Memset", 'B', event.stream, event.start)
  437. .arg("value", imperative::to_string(event.value))
  438. .arg("bytes", imperative::to_string(event.bytes));
  439. new_cupti_event("Memset", 'E', event.stream, event.start)
  440. .arg("value", imperative::to_string(event.value))
  441. .arg("bytes", imperative::to_string(event.bytes));
  442. } else if constexpr (std::is_same_v<TEvent, CUPTIRuntimeEvent>) {
  443. new_host_event(event.name, 'B');
  444. } else if constexpr (std::is_same_v<TEvent, CUPTIRuntimeFinishEvent>) {
  445. new_host_event(event.name, 'E');
  446. } else if constexpr (std::is_same_v<TEvent, CUPTIDriverEvent>) {
  447. new_host_event(event.name, 'B');
  448. new_host_event(pid_str, 's')
  449. .id(event.correlation_id)
  450. .cat("CUPTILink")
  451. .scope(pid_str);
  452. } else if constexpr (std::is_same_v<TEvent, CUPTIDriverFinishEvent>) {
  453. new_host_event(event.name, 'E');
  454. }
  455. }
  456. void notify_counter(std::string key, int64_t old_val, int64_t new_val) {
  457. new_host_event(key, 'C').arg("value", new_val);
  458. }
  459. void name_threads(Profiler::thread_dict_t thread_dict) {
  460. for (auto&& host : host_threads()) {
  461. if (thread_dict.count(host)) {
  462. trace_events.new_event()
  463. .name("thread_name")
  464. .ph('M')
  465. .pid(pid)
  466. .tid(to_tid(host))
  467. .arg("name", thread_dict.at(host));
  468. }
  469. }
  470. for (auto&& device : devices()) {
  471. trace_events.new_event()
  472. .name("thread_name")
  473. .ph('M')
  474. .pid(pid)
  475. .tid(to_tid(device))
  476. .arg("name", device.to_string_logical());
  477. }
  478. }
  479. };
  480. void dump_chrome_timeline(std::string filename, Profiler::bundle_t result) {
  481. ChromeTimelineEventVisitor visitor{};
  482. visitor.process_events(result);
  483. visitor.name_threads(result.thread_dict);
  484. auto trace_events = std::move(visitor.trace_events);
  485. trace_events.metadata("localTime") =
  486. std::to_string(result.start_at.time_since_epoch().count());
  487. std::string json_repr = trace_events.to_string();
  488. mgb::debug::write_to_file(filename.c_str(), json_repr);
  489. }
  490. } // namespace mgb::imperative::profiler