Skip to content

Commit 6744754

Browse files
authored
Add time range duration display (#41029)
* no * fix bugs * fix doc according to review * fix api doc format * fix api doc according to review * fix bug and add unit test * fix record event bug * optimize chrome tracing display * fix bug * add comment * add unit test * fix a bug * fix * fix * fix format
1 parent b9da48d commit 6744754

File tree

6 files changed

+166
-69
lines changed

6 files changed

+166
-69
lines changed

paddle/fluid/platform/profiler.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,9 @@ void RecordEvent::OriginalConstruct(const std::string &name,
162162
void RecordEvent::End() {
163163
#ifndef _WIN32
164164
#ifdef PADDLE_WITH_CUDA
165-
if (g_enable_nvprof_hook && is_pushed_ && is_enabled_) {
165+
if (g_enable_nvprof_hook && is_pushed_) {
166166
dynload::nvtxRangePop();
167+
is_pushed_ = false;
167168
}
168169
#endif
169170
#endif

paddle/fluid/platform/profiler/chrometracing_logger.cc

Lines changed: 119 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ limitations under the License. */
1414

1515
#include <cstdio>
1616
#include <ctime>
17+
#include <limits>
1718

1819
#include "glog/logging.h"
1920

@@ -75,6 +76,26 @@ void ChromeTracingLogger::LogNodeTrees(const NodeTrees& node_trees) {
7576
// log all nodes except root node, root node is a helper node.
7677
const std::map<uint64_t, std::vector<HostTraceEventNode*>>
7778
thread2host_event_nodes = node_trees.Traverse(true);
79+
// find the earliest time in current timeline
80+
start_time_ = std::numeric_limits<uint64_t>::max();
81+
for (auto it = thread2host_event_nodes.begin();
82+
it != thread2host_event_nodes.end(); ++it) {
83+
if (it->second.begin() + 1 != it->second.end()) {
84+
if ((*(it->second.begin() + 1))->StartNs() < start_time_) {
85+
start_time_ = (*(it->second.begin() + 1))->StartNs();
86+
}
87+
} else {
88+
auto runtimenode =
89+
(*(it->second.begin()))->GetRuntimeTraceEventNodes().begin();
90+
if (runtimenode !=
91+
(*(it->second.begin()))->GetRuntimeTraceEventNodes().end()) {
92+
if ((*runtimenode)->StartNs() < start_time_) {
93+
start_time_ = (*runtimenode)->StartNs();
94+
}
95+
}
96+
}
97+
}
98+
7899
for (auto it = thread2host_event_nodes.begin();
79100
it != thread2host_event_nodes.end(); ++it) {
80101
for (auto hostnode = it->second.begin(); hostnode != it->second.end();
@@ -102,6 +123,13 @@ void ChromeTracingLogger::LogHostTraceEventNode(
102123
if (!output_file_stream_) {
103124
return;
104125
}
126+
std::string dur_display;
127+
float dur = nsToMsFloat(host_node.Duration());
128+
if (dur > 1.0) {
129+
dur_display = string_format(std::string("%.3f ms"), dur);
130+
} else {
131+
dur_display = string_format(std::string("%.3f us"), dur * 1000);
132+
}
105133
switch (host_node.Type()) {
106134
case TracerEventType::ProfileStep:
107135
case TracerEventType::Forward:
@@ -110,42 +138,50 @@ void ChromeTracingLogger::LogHostTraceEventNode(
110138
case TracerEventType::Optimization:
111139
case TracerEventType::PythonOp:
112140
case TracerEventType::PythonUserDefined:
141+
// cname value comes from tracing.js reservedColorsByName variable
142+
113143
output_file_stream_ << string_format(
114144
std::string(
115145
R"JSON(
116146
{
117-
"name": "%s", "pid": %lld, "tid": "%lld(Python)",
118-
"ts": %lld, "dur": %lld,
147+
"name": "%s[%s]", "pid": %lld, "tid": "%lld(Python)",
148+
"ts": %lld, "dur": %.3f,
119149
"ph": "X", "cat": "%s",
150+
"cname": "thread_state_runnable",
120151
"args": {
121-
"start_ns": %lld,
122-
"end_ns": %lld
152+
"start_time": "%.3f us",
153+
"end_time": "%.3f us"
123154
}
124155
},
125156
)JSON"),
126-
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(),
127-
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()),
157+
host_node.Name().c_str(), dur_display.c_str(), host_node.ProcessId(),
158+
host_node.ThreadId(), nsToUs(host_node.StartNs()),
159+
nsToUsFloat(host_node.Duration()),
128160
categary_name_[static_cast<int>(host_node.Type())],
129-
host_node.StartNs(), host_node.EndNs());
161+
nsToUsFloat(host_node.StartNs(), start_time_),
162+
nsToUsFloat(host_node.EndNs(), start_time_));
130163
break;
131164
default:
132165
output_file_stream_ << string_format(
133166
std::string(
134167
R"JSON(
135168
{
136-
"name": "%s", "pid": %lld, "tid": "%lld(C++)",
137-
"ts": %lld, "dur": %lld,
169+
"name": "%s[%s]", "pid": %lld, "tid": "%lld(C++)",
170+
"ts": %lld, "dur": %.3f,
138171
"ph": "X", "cat": "%s",
172+
"cname": "thread_state_runnable",
139173
"args": {
140-
"start_ns": %lld,
141-
"end_ns": %lld
174+
"start_time": "%.3f us",
175+
"end_time": "%.3f us"
142176
}
143177
},
144178
)JSON"),
145-
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(),
146-
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()),
179+
host_node.Name().c_str(), dur_display.c_str(), host_node.ProcessId(),
180+
host_node.ThreadId(), nsToUs(host_node.StartNs()),
181+
nsToUsFloat(host_node.Duration()),
147182
categary_name_[static_cast<int>(host_node.Type())],
148-
host_node.StartNs(), host_node.EndNs());
183+
nsToUsFloat(host_node.StartNs(), start_time_),
184+
nsToUsFloat(host_node.EndNs(), start_time_));
149185
break;
150186
}
151187

@@ -157,26 +193,35 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode(
157193
if (!output_file_stream_) {
158194
return;
159195
}
196+
float dur = nsToMsFloat(runtime_node.Duration());
197+
std::string dur_display;
198+
if (dur > 1.0) {
199+
dur_display = string_format(std::string("%.3f ms"), dur);
200+
} else {
201+
dur_display = string_format(std::string("%.3f us"), dur * 1000);
202+
}
160203
output_file_stream_ << string_format(
161204
std::string(
162205
R"JSON(
163206
{
164-
"name": "%s", "pid": %lld, "tid": "%lld(C++)",
165-
"ts": %lld, "dur": %lld,
207+
"name": "%s[%s]", "pid": %lld, "tid": "%lld(C++)",
208+
"ts": %lld, "dur": %.3f,
166209
"ph": "X", "cat": "%s",
210+
"cname": "thread_state_running",
167211
"args": {
168212
"correlation id": %d,
169-
"start_ns": %lld,
170-
"end_ns": %lld
213+
"start_time": "%.3f us",
214+
"end_time": "%.3f us"
171215
}
172216
},
173217
)JSON"),
174-
runtime_node.Name().c_str(), runtime_node.ProcessId(),
175-
runtime_node.ThreadId(), nsToUs(runtime_node.StartNs()),
176-
nsToUs(runtime_node.Duration()),
218+
runtime_node.Name().c_str(), dur_display.c_str(),
219+
runtime_node.ProcessId(), runtime_node.ThreadId(),
220+
nsToUs(runtime_node.StartNs()), nsToUsFloat(runtime_node.Duration()),
177221
categary_name_[static_cast<int>(runtime_node.Type())],
178-
runtime_node.CorrelationId(), runtime_node.StartNs(),
179-
runtime_node.EndNs());
222+
runtime_node.CorrelationId(),
223+
nsToUsFloat(runtime_node.StartNs(), start_time_),
224+
nsToUsFloat(runtime_node.EndNs(), start_time_));
180225
pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()});
181226

182227
output_file_stream_ << string_format(
@@ -199,6 +244,7 @@ void ChromeTracingLogger::LogDeviceTraceEventNode(
199244
if (!output_file_stream_) {
200245
return;
201246
}
247+
202248
switch (device_node.Type()) {
203249
case TracerEventType::Kernel:
204250
HandleTypeKernel(device_node);
@@ -265,17 +311,24 @@ void ChromeTracingLogger::HandleTypeKernel(
265311
kernel_info.block_x, kernel_info.block_y, kernel_info.block_z,
266312
blocks_per_sm);
267313
#endif
268-
314+
float dur = nsToMsFloat(device_node.Duration());
315+
std::string dur_display;
316+
if (dur > 1.0) {
317+
dur_display = string_format(std::string("%.3f ms"), dur);
318+
} else {
319+
dur_display = string_format(std::string("%.3f us"), dur * 1000);
320+
}
269321
output_file_stream_ << string_format(
270322
std::string(
271323
R"JSON(
272324
{
273-
"name": "%s", "pid": %lld, "tid": %lld,
274-
"ts": %lld, "dur": %lld,
325+
"name": "%s[%s]", "pid": %lld, "tid": %lld,
326+
"ts": %lld, "dur": %.3f,
275327
"ph": "X", "cat": "%s",
328+
"cname": "rail_animation",
276329
"args": {
277-
"start_ns": %lld,
278-
"end_ns": %lld,
330+
"start_time": "%.3f us",
331+
"end_time": "%.3f us",
279332
"device": %d, "context": %d,
280333
"stream": %d, "correlation id": %d,
281334
"registers per thread": %d,
@@ -284,15 +337,16 @@ void ChromeTracingLogger::HandleTypeKernel(
284337
"warps per SM": %f,
285338
"grid": [%d, %d, %d],
286339
"block": [%d, %d, %d],
287-
"theoretical achieved occupancy %%": %f
340+
"theoretical achieved occupancy %%": %.3f
288341
}
289342
},
290343
)JSON"),
291-
device_node.Name().c_str(), device_node.DeviceId(),
344+
device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
292345
device_node.StreamId(), nsToUs(device_node.StartNs()),
293-
nsToUs(device_node.Duration()),
346+
nsToUsFloat(device_node.Duration()),
294347
categary_name_[static_cast<int>(device_node.Type())],
295-
device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(),
348+
nsToUsFloat(device_node.StartNs(), start_time_),
349+
nsToUsFloat(device_node.EndNs(), start_time_), device_node.DeviceId(),
296350
device_node.ContextId(), device_node.StreamId(),
297351
device_node.CorrelationId(), kernel_info.registers_per_thread,
298352
kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory,
@@ -308,53 +362,71 @@ void ChromeTracingLogger::HandleTypeMemcpy(
308362
if (device_node.Duration() > 0) {
309363
memory_bandwidth = memcpy_info.num_bytes * 1.0 / device_node.Duration();
310364
}
365+
float dur = nsToMsFloat(device_node.Duration());
366+
std::string dur_display;
367+
if (dur > 1.0) {
368+
dur_display = string_format(std::string("%.3f ms"), dur);
369+
} else {
370+
dur_display = string_format(std::string("%.3f us"), dur * 1000);
371+
}
311372
output_file_stream_ << string_format(
312373
std::string(
313374
R"JSON(
314375
{
315-
"name": "%s", "pid": %lld, "tid": %lld,
316-
"ts": %lld, "dur": %lld,
376+
"name": "%s[%s]", "pid": %lld, "tid": %lld,
377+
"ts": %lld, "dur": %.3f,
317378
"ph": "X", "cat": "%s",
379+
"cname": "rail_animation",
318380
"args": {
319-
"start_ns": %lld,
320-
"end_ns": %lld,
381+
"start_time": "%.3f us",
382+
"end_time": "%.3f us",
321383
"stream": %d, "correlation id": %d,
322-
"bytes": %d, "memory bandwidth (GB/s)": %f
384+
"bytes": %d, "memory bandwidth (GB/s)": %.3f
323385
}
324386
},
325387
)JSON"),
326-
device_node.Name().c_str(), device_node.DeviceId(),
388+
device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
327389
device_node.StreamId(), nsToUs(device_node.StartNs()),
328-
nsToUs(device_node.Duration()),
390+
nsToUsFloat(device_node.Duration()),
329391
categary_name_[static_cast<int>(device_node.Type())],
330-
device_node.StartNs(), device_node.EndNs(), device_node.StreamId(),
392+
nsToUsFloat(device_node.StartNs(), start_time_),
393+
nsToUsFloat(device_node.EndNs(), start_time_), device_node.StreamId(),
331394
device_node.CorrelationId(), memcpy_info.num_bytes, memory_bandwidth);
332395
}
333396

334397
void ChromeTracingLogger::HandleTypeMemset(
335398
const DeviceTraceEventNode& device_node) {
336399
MemsetEventInfo memset_info = device_node.MemsetInfo();
400+
float dur = nsToMsFloat(device_node.Duration());
401+
std::string dur_display;
402+
if (dur > 1.0) {
403+
dur_display = string_format(std::string("%.3f ms"), dur);
404+
} else {
405+
dur_display = string_format(std::string("%.3f us"), dur * 1000);
406+
}
337407
output_file_stream_ << string_format(
338408
std::string(
339409
R"JSON(
340410
{
341-
"name": "%s", "pid": %lld, "tid": %lld,
342-
"ts": %lld, "dur": %lld,
411+
"name": "%s[%s]", "pid": %lld, "tid": %lld,
412+
"ts": %lld, "dur": %.3f,
343413
"ph": "X", "cat": "%s",
414+
"cname": "rail_animation",
344415
"args": {
345-
"start_ns": %lld,
346-
"end_ns": %lld,
416+
"start_time": "%.3f us",
417+
"end_time": "%.3f us",
347418
"device": %d, "context": %d,
348419
"stream": %d, "correlation id": %d,
349420
"bytes": %d, "value": %d
350421
}
351422
},
352423
)JSON"),
353-
device_node.Name().c_str(), device_node.DeviceId(),
424+
device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
354425
device_node.StreamId(), nsToUs(device_node.StartNs()),
355-
nsToUs(device_node.Duration()),
426+
nsToUsFloat(device_node.Duration()),
356427
categary_name_[static_cast<int>(device_node.Type())],
357-
device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(),
428+
nsToUsFloat(device_node.StartNs(), start_time_),
429+
nsToUsFloat(device_node.EndNs(), start_time_), device_node.DeviceId(),
358430
device_node.ContextId(), device_node.StreamId(),
359431
device_node.CorrelationId(), memset_info.num_bytes, memset_info.value);
360432
}

paddle/fluid/platform/profiler/chrometracing_logger.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class ChromeTracingLogger : public BaseLogger {
5050
static const char* categary_name_[];
5151
std::set<std::pair<uint64_t, uint64_t>> pid_tid_set_;
5252
std::set<std::pair<uint64_t, uint64_t>> deviceid_streamid_set_;
53+
uint64_t start_time_;
5354
};
5455

5556
} // namespace platform

paddle/fluid/platform/profiler/utils.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,16 @@ static std::string GetStringFormatLocalTime() {
4444
return std::string(buf);
4545
}
4646

47-
static int64_t nsToUs(int64_t ns) { return ns / 1000; }
47+
static int64_t nsToUs(uint64_t end_ns, uint64_t start_ns = 0) {
48+
return (end_ns - start_ns) / 1000;
49+
}
50+
51+
static float nsToUsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
52+
return static_cast<float>(end_ns - start_ns) / 1000;
53+
}
54+
static float nsToMsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
55+
return static_cast<float>(end_ns - start_ns) / 1000 / 1000;
56+
}
4857

4958
#ifdef PADDLE_WITH_CUPTI
5059
float CalculateEstOccupancy(uint32_t deviceId, uint16_t registersPerThread,

python/paddle/fluid/tests/unittests/test_newprofiler.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,16 @@ def my_sheduler1(num_step):
128128
result = profiler.utils.load_profiler_result('./test_profiler_pb.pb')
129129

130130

131+
class TestNvprof(unittest.TestCase):
132+
def test_nvprof(self):
133+
for i in range(10):
134+
paddle.fluid.profiler._nvprof_range(i, 10, 20)
135+
x_value = np.random.randn(2, 3, 3)
136+
x = paddle.to_tensor(
137+
x_value, stop_gradient=False, place=paddle.CPUPlace())
138+
y = x / 2.0
139+
140+
131141
class RandomDataset(Dataset):
132142
def __init__(self, num_samples):
133143
self.num_samples = num_samples

0 commit comments

Comments
 (0)