@@ -14,6 +14,7 @@ limitations under the License. */
1414
1515#include < cstdio>
1616#include < ctime>
17+ #include < limits>
1718
1819#include " glog/logging.h"
1920
@@ -75,6 +76,26 @@ void ChromeTracingLogger::LogNodeTrees(const NodeTrees& node_trees) {
7576 // log all nodes except root node, root node is a helper node.
7677 const std::map<uint64_t , std::vector<HostTraceEventNode*>>
7778 thread2host_event_nodes = node_trees.Traverse (true );
79+ // find the earliest time in current timeline
80+ start_time_ = std::numeric_limits<uint64_t >::max ();
81+ for (auto it = thread2host_event_nodes.begin ();
82+ it != thread2host_event_nodes.end (); ++it) {
83+ if (it->second .begin () + 1 != it->second .end ()) {
84+ if ((*(it->second .begin () + 1 ))->StartNs () < start_time_) {
85+ start_time_ = (*(it->second .begin () + 1 ))->StartNs ();
86+ }
87+ } else {
88+ auto runtimenode =
89+ (*(it->second .begin ()))->GetRuntimeTraceEventNodes ().begin ();
90+ if (runtimenode !=
91+ (*(it->second .begin ()))->GetRuntimeTraceEventNodes ().end ()) {
92+ if ((*runtimenode)->StartNs () < start_time_) {
93+ start_time_ = (*runtimenode)->StartNs ();
94+ }
95+ }
96+ }
97+ }
98+
7899 for (auto it = thread2host_event_nodes.begin ();
79100 it != thread2host_event_nodes.end (); ++it) {
80101 for (auto hostnode = it->second .begin (); hostnode != it->second .end ();
@@ -102,6 +123,13 @@ void ChromeTracingLogger::LogHostTraceEventNode(
102123 if (!output_file_stream_) {
103124 return ;
104125 }
126+ std::string dur_display;
127+ float dur = nsToMsFloat (host_node.Duration ());
128+ if (dur > 1.0 ) {
129+ dur_display = string_format (std::string (" %.3f ms" ), dur);
130+ } else {
131+ dur_display = string_format (std::string (" %.3f us" ), dur * 1000 );
132+ }
105133 switch (host_node.Type ()) {
106134 case TracerEventType::ProfileStep:
107135 case TracerEventType::Forward:
@@ -110,42 +138,50 @@ void ChromeTracingLogger::LogHostTraceEventNode(
110138 case TracerEventType::Optimization:
111139 case TracerEventType::PythonOp:
112140 case TracerEventType::PythonUserDefined:
141+ // cname value comes from tracing.js reservedColorsByName variable
142+
113143 output_file_stream_ << string_format (
114144 std::string (
115145 R"JSON(
116146 {
117- "name": "%s", "pid": %lld, "tid": "%lld(Python)",
118- "ts": %lld, "dur": %lld ,
147+ "name": "%s[%s] ", "pid": %lld, "tid": "%lld(Python)",
148+ "ts": %lld, "dur": %.3f ,
119149 "ph": "X", "cat": "%s",
150+ "cname": "thread_state_runnable",
120151 "args": {
121- "start_ns ": %lld ,
122- "end_ns ": %lld
152+ "start_time ": "%.3f us" ,
153+ "end_time ": "%.3f us"
123154 }
124155 },
125156 )JSON" ),
126- host_node.Name ().c_str (), host_node.ProcessId (), host_node.ThreadId (),
127- nsToUs (host_node.StartNs ()), nsToUs (host_node.Duration ()),
157+ host_node.Name ().c_str (), dur_display.c_str (), host_node.ProcessId (),
158+ host_node.ThreadId (), nsToUs (host_node.StartNs ()),
159+ nsToUsFloat (host_node.Duration ()),
128160 categary_name_[static_cast <int >(host_node.Type ())],
129- host_node.StartNs (), host_node.EndNs ());
161+ nsToUsFloat (host_node.StartNs (), start_time_),
162+ nsToUsFloat (host_node.EndNs (), start_time_));
130163 break ;
131164 default :
132165 output_file_stream_ << string_format (
133166 std::string (
134167 R"JSON(
135168 {
136- "name": "%s", "pid": %lld, "tid": "%lld(C++)",
137- "ts": %lld, "dur": %lld ,
169+ "name": "%s[%s] ", "pid": %lld, "tid": "%lld(C++)",
170+ "ts": %lld, "dur": %.3f ,
138171 "ph": "X", "cat": "%s",
172+ "cname": "thread_state_runnable",
139173 "args": {
140- "start_ns ": %lld ,
141- "end_ns ": %lld
174+ "start_time ": "%.3f us" ,
175+ "end_time ": "%.3f us"
142176 }
143177 },
144178 )JSON" ),
145- host_node.Name ().c_str (), host_node.ProcessId (), host_node.ThreadId (),
146- nsToUs (host_node.StartNs ()), nsToUs (host_node.Duration ()),
179+ host_node.Name ().c_str (), dur_display.c_str (), host_node.ProcessId (),
180+ host_node.ThreadId (), nsToUs (host_node.StartNs ()),
181+ nsToUsFloat (host_node.Duration ()),
147182 categary_name_[static_cast <int >(host_node.Type ())],
148- host_node.StartNs (), host_node.EndNs ());
183+ nsToUsFloat (host_node.StartNs (), start_time_),
184+ nsToUsFloat (host_node.EndNs (), start_time_));
149185 break ;
150186 }
151187
@@ -157,26 +193,35 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode(
157193 if (!output_file_stream_) {
158194 return ;
159195 }
196+ float dur = nsToMsFloat (runtime_node.Duration ());
197+ std::string dur_display;
198+ if (dur > 1.0 ) {
199+ dur_display = string_format (std::string (" %.3f ms" ), dur);
200+ } else {
201+ dur_display = string_format (std::string (" %.3f us" ), dur * 1000 );
202+ }
160203 output_file_stream_ << string_format (
161204 std::string (
162205 R"JSON(
163206 {
164- "name": "%s", "pid": %lld, "tid": "%lld(C++)",
165- "ts": %lld, "dur": %lld ,
207+ "name": "%s[%s] ", "pid": %lld, "tid": "%lld(C++)",
208+ "ts": %lld, "dur": %.3f ,
166209 "ph": "X", "cat": "%s",
210+ "cname": "thread_state_running",
167211 "args": {
168212 "correlation id": %d,
169- "start_ns ": %lld ,
170- "end_ns ": %lld
213+ "start_time ": "%.3f us" ,
214+ "end_time ": "%.3f us"
171215 }
172216 },
173217 )JSON" ),
174- runtime_node.Name ().c_str (), runtime_node. ProcessId (),
175- runtime_node.ThreadId (), nsToUs ( runtime_node.StartNs () ),
176- nsToUs (runtime_node.Duration ()),
218+ runtime_node.Name ().c_str (), dur_display. c_str (),
219+ runtime_node.ProcessId (), runtime_node.ThreadId ( ),
220+ nsToUs (runtime_node.StartNs ()), nsToUsFloat (runtime_node. Duration ()),
177221 categary_name_[static_cast <int >(runtime_node.Type ())],
178- runtime_node.CorrelationId (), runtime_node.StartNs (),
179- runtime_node.EndNs ());
222+ runtime_node.CorrelationId (),
223+ nsToUsFloat (runtime_node.StartNs (), start_time_),
224+ nsToUsFloat (runtime_node.EndNs (), start_time_));
180225 pid_tid_set_.insert ({runtime_node.ProcessId (), runtime_node.ThreadId ()});
181226
182227 output_file_stream_ << string_format (
@@ -199,6 +244,7 @@ void ChromeTracingLogger::LogDeviceTraceEventNode(
199244 if (!output_file_stream_) {
200245 return ;
201246 }
247+
202248 switch (device_node.Type ()) {
203249 case TracerEventType::Kernel:
204250 HandleTypeKernel (device_node);
@@ -265,17 +311,24 @@ void ChromeTracingLogger::HandleTypeKernel(
265311 kernel_info.block_x , kernel_info.block_y , kernel_info.block_z ,
266312 blocks_per_sm);
267313#endif
268-
314+ float dur = nsToMsFloat (device_node.Duration ());
315+ std::string dur_display;
316+ if (dur > 1.0 ) {
317+ dur_display = string_format (std::string (" %.3f ms" ), dur);
318+ } else {
319+ dur_display = string_format (std::string (" %.3f us" ), dur * 1000 );
320+ }
269321 output_file_stream_ << string_format (
270322 std::string (
271323 R"JSON(
272324 {
273- "name": "%s", "pid": %lld, "tid": %lld,
274- "ts": %lld, "dur": %lld ,
325+ "name": "%s[%s] ", "pid": %lld, "tid": %lld,
326+ "ts": %lld, "dur": %.3f ,
275327 "ph": "X", "cat": "%s",
328+ "cname": "rail_animation",
276329 "args": {
277- "start_ns ": %lld ,
278- "end_ns ": %lld ,
330+ "start_time ": "%.3f us" ,
331+ "end_time ": "%.3f us" ,
279332 "device": %d, "context": %d,
280333 "stream": %d, "correlation id": %d,
281334 "registers per thread": %d,
@@ -284,15 +337,16 @@ void ChromeTracingLogger::HandleTypeKernel(
284337 "warps per SM": %f,
285338 "grid": [%d, %d, %d],
286339 "block": [%d, %d, %d],
287- "theoretical achieved occupancy %%": %f
340+ "theoretical achieved occupancy %%": %.3f
288341 }
289342 },
290343 )JSON" ),
291- device_node.Name ().c_str (), device_node.DeviceId (),
344+ device_node.Name ().c_str (), dur_display. c_str (), device_node.DeviceId (),
292345 device_node.StreamId (), nsToUs (device_node.StartNs ()),
293- nsToUs (device_node.Duration ()),
346+ nsToUsFloat (device_node.Duration ()),
294347 categary_name_[static_cast <int >(device_node.Type ())],
295- device_node.StartNs (), device_node.EndNs (), device_node.DeviceId (),
348+ nsToUsFloat (device_node.StartNs (), start_time_),
349+ nsToUsFloat (device_node.EndNs (), start_time_), device_node.DeviceId (),
296350 device_node.ContextId (), device_node.StreamId (),
297351 device_node.CorrelationId (), kernel_info.registers_per_thread ,
298352 kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory ,
@@ -308,53 +362,71 @@ void ChromeTracingLogger::HandleTypeMemcpy(
308362 if (device_node.Duration () > 0 ) {
309363 memory_bandwidth = memcpy_info.num_bytes * 1.0 / device_node.Duration ();
310364 }
365+ float dur = nsToMsFloat (device_node.Duration ());
366+ std::string dur_display;
367+ if (dur > 1.0 ) {
368+ dur_display = string_format (std::string (" %.3f ms" ), dur);
369+ } else {
370+ dur_display = string_format (std::string (" %.3f us" ), dur * 1000 );
371+ }
311372 output_file_stream_ << string_format (
312373 std::string (
313374 R"JSON(
314375 {
315- "name": "%s", "pid": %lld, "tid": %lld,
316- "ts": %lld, "dur": %lld ,
376+ "name": "%s[%s] ", "pid": %lld, "tid": %lld,
377+ "ts": %lld, "dur": %.3f ,
317378 "ph": "X", "cat": "%s",
379+ "cname": "rail_animation",
318380 "args": {
319- "start_ns ": %lld ,
320- "end_ns ": %lld ,
381+ "start_time ": "%.3f us" ,
382+ "end_time ": "%.3f us" ,
321383 "stream": %d, "correlation id": %d,
322- "bytes": %d, "memory bandwidth (GB/s)": %f
384+ "bytes": %d, "memory bandwidth (GB/s)": %.3f
323385 }
324386 },
325387 )JSON" ),
326- device_node.Name ().c_str (), device_node.DeviceId (),
388+ device_node.Name ().c_str (), dur_display. c_str (), device_node.DeviceId (),
327389 device_node.StreamId (), nsToUs (device_node.StartNs ()),
328- nsToUs (device_node.Duration ()),
390+ nsToUsFloat (device_node.Duration ()),
329391 categary_name_[static_cast <int >(device_node.Type ())],
330- device_node.StartNs (), device_node.EndNs (), device_node.StreamId (),
392+ nsToUsFloat (device_node.StartNs (), start_time_),
393+ nsToUsFloat (device_node.EndNs (), start_time_), device_node.StreamId (),
331394 device_node.CorrelationId (), memcpy_info.num_bytes , memory_bandwidth);
332395}
333396
334397void ChromeTracingLogger::HandleTypeMemset (
335398 const DeviceTraceEventNode& device_node) {
336399 MemsetEventInfo memset_info = device_node.MemsetInfo ();
400+ float dur = nsToMsFloat (device_node.Duration ());
401+ std::string dur_display;
402+ if (dur > 1.0 ) {
403+ dur_display = string_format (std::string (" %.3f ms" ), dur);
404+ } else {
405+ dur_display = string_format (std::string (" %.3f us" ), dur * 1000 );
406+ }
337407 output_file_stream_ << string_format (
338408 std::string (
339409 R"JSON(
340410 {
341- "name": "%s", "pid": %lld, "tid": %lld,
342- "ts": %lld, "dur": %lld ,
411+ "name": "%s[%s] ", "pid": %lld, "tid": %lld,
412+ "ts": %lld, "dur": %.3f ,
343413 "ph": "X", "cat": "%s",
414+ "cname": "rail_animation",
344415 "args": {
345- "start_ns ": %lld ,
346- "end_ns ": %lld ,
416+ "start_time ": "%.3f us" ,
417+ "end_time ": "%.3f us" ,
347418 "device": %d, "context": %d,
348419 "stream": %d, "correlation id": %d,
349420 "bytes": %d, "value": %d
350421 }
351422 },
352423 )JSON" ),
353- device_node.Name ().c_str (), device_node.DeviceId (),
424+ device_node.Name ().c_str (), dur_display. c_str (), device_node.DeviceId (),
354425 device_node.StreamId (), nsToUs (device_node.StartNs ()),
355- nsToUs (device_node.Duration ()),
426+ nsToUsFloat (device_node.Duration ()),
356427 categary_name_[static_cast <int >(device_node.Type ())],
357- device_node.StartNs (), device_node.EndNs (), device_node.DeviceId (),
428+ nsToUsFloat (device_node.StartNs (), start_time_),
429+ nsToUsFloat (device_node.EndNs (), start_time_), device_node.DeviceId (),
358430 device_node.ContextId (), device_node.StreamId (),
359431 device_node.CorrelationId (), memset_info.num_bytes , memset_info.value );
360432}
0 commit comments