Skip to content

Commit 2f6fede

Browse files
committed
Allow perf to work without frame pointers
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
1 parent 2655369 commit 2f6fede

File tree

12 files changed

+668
-122
lines changed

12 files changed

+668
-122
lines changed

Include/cpython/sysmodule.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
1919
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
2020
const void *code_addr,
2121
unsigned int code_size,
22-
const char *entry_name);
22+
PyCodeObject *entry_name);
2323
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
2424
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
2525
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *);

Include/internal/pycore_ceval.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,19 @@ typedef struct {
103103
unsigned int code_size, PyCodeObject* code);
104104
// Callback to free the trampoline state
105105
int (*free_state)(void* state);
106+
int (*copy_file)(const char* filename);
106107
} _PyPerf_Callbacks;
107108

108109
extern int _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *);
109110
extern void _PyPerfTrampoline_GetCallbacks(_PyPerf_Callbacks *);
111+
extern void* _PyPerfTrampoline_GetState(void);
110112
extern int _PyPerfTrampoline_Init(int activate);
111113
extern int _PyPerfTrampoline_Fini(void);
112114
extern int _PyIsPerfTrampolineActive(void);
113115
extern PyStatus _PyPerfTrampoline_AfterFork_Child(void);
114116
#ifdef PY_HAVE_PERF_TRAMPOLINE
115117
extern _PyPerf_Callbacks _Py_perfmap_callbacks;
118+
extern _PyPerf_Callbacks _Py_perfmap_jit_callbacks;
116119
#endif
117120

118121
static inline PyObject*

Include/internal/pycore_ceval_state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct trampoline_api_st {
4343
void (*write_state)(void* state, const void *code_addr,
4444
unsigned int code_size, PyCodeObject* code);
4545
int (*free_state)(void* state);
46+
int (*copy_file)(const char* filename);
4647
void *state;
4748
};
4849
#endif

Lib/test/test_perf_profiler.py

Lines changed: 105 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sysconfig
66
import os
77
import pathlib
8+
import shutil
89
from test import support
910
from test.support.script_helper import (
1011
make_script,
@@ -76,14 +77,27 @@ def baz():
7677
perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
7778
self.assertTrue(perf_file.exists())
7879
perf_file_contents = perf_file.read_text()
79-
perf_lines = perf_file_contents.splitlines();
80-
expected_symbols = [f"py::foo:{script}", f"py::bar:{script}", f"py::baz:{script}"]
80+
perf_lines = perf_file_contents.splitlines()
81+
expected_symbols = [
82+
f"py::foo:{script}",
83+
f"py::bar:{script}",
84+
f"py::baz:{script}",
85+
]
8186
for expected_symbol in expected_symbols:
82-
perf_line = next((line for line in perf_lines if expected_symbol in line), None)
83-
self.assertIsNotNone(perf_line, f"Could not find {expected_symbol} in perf file")
87+
perf_line = next(
88+
(line for line in perf_lines if expected_symbol in line), None
89+
)
90+
self.assertIsNotNone(
91+
perf_line, f"Could not find {expected_symbol} in perf file"
92+
)
8493
perf_addr = perf_line.split(" ")[0]
85-
self.assertFalse(perf_addr.startswith("0x"), "Address should not be prefixed with 0x")
86-
self.assertTrue(set(perf_addr).issubset(string.hexdigits), "Address should contain only hex characters")
94+
self.assertFalse(
95+
perf_addr.startswith("0x"), "Address should not be prefixed with 0x"
96+
)
97+
self.assertTrue(
98+
set(perf_addr).issubset(string.hexdigits),
99+
"Address should contain only hex characters",
100+
)
87101

88102
def test_trampoline_works_with_forks(self):
89103
code = """if 1:
@@ -212,7 +226,7 @@ def test_sys_api_get_status(self):
212226
assert_python_ok("-c", code)
213227

214228

215-
def is_unwinding_reliable():
229+
def is_unwinding_reliable_with_frame_pointers():
216230
cflags = sysconfig.get_config_var("PY_CORE_CFLAGS")
217231
if not cflags:
218232
return False
@@ -259,24 +273,49 @@ def perf_command_works():
259273
return True
260274

261275

262-
def run_perf(cwd, *args, **env_vars):
276+
def run_perf(cwd, *args, use_jit=False, **env_vars):
263277
if env_vars:
264278
env = os.environ.copy()
265279
env.update(env_vars)
266280
else:
267281
env = None
268282
output_file = cwd + "/perf_output.perf"
269-
base_cmd = ("perf", "record", "-g", "--call-graph=fp", "-o", output_file, "--")
283+
if not use_jit:
284+
base_cmd = ("perf", "record", "-g", "--call-graph=fp", "-o", output_file, "--")
285+
else:
286+
base_cmd = (
287+
"perf",
288+
"record",
289+
"-g",
290+
"--call-graph=dwarf,65528",
291+
"-F99",
292+
"-k1",
293+
"-o",
294+
output_file,
295+
"--",
296+
)
270297
proc = subprocess.run(
271298
base_cmd + args,
272299
stdout=subprocess.PIPE,
273300
stderr=subprocess.PIPE,
274301
env=env,
275302
)
276303
if proc.returncode:
277-
print(proc.stderr)
304+
print(proc.stderr, file=sys.stderr)
278305
raise ValueError(f"Perf failed with return code {proc.returncode}")
279306

307+
if use_jit:
308+
jit_output_file = cwd + "/jit_output.dump"
309+
command = ("perf", "inject", "-j", "-i", output_file, "-o", jit_output_file)
310+
proc = subprocess.run(
311+
command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, env=env
312+
)
313+
if proc.returncode:
314+
print(proc.stderr)
315+
raise ValueError(f"Perf failed with return code {proc.returncode}")
316+
# Copy the jit_output_file to the output_file
317+
os.rename(jit_output_file, output_file)
318+
280319
base_cmd = ("perf", "script")
281320
proc = subprocess.run(
282321
("perf", "script", "-i", output_file),
@@ -290,20 +329,9 @@ def run_perf(cwd, *args, **env_vars):
290329
)
291330

292331

293-
@unittest.skipUnless(perf_command_works(), "perf command doesn't work")
294-
@unittest.skipUnless(is_unwinding_reliable(), "Unwinding is unreliable")
295-
class TestPerfProfiler(unittest.TestCase):
296-
def setUp(self):
297-
super().setUp()
298-
self.perf_files = set(pathlib.Path("/tmp/").glob("perf-*.map"))
299-
300-
def tearDown(self) -> None:
301-
super().tearDown()
302-
files_to_delete = (
303-
set(pathlib.Path("/tmp/").glob("perf-*.map")) - self.perf_files
304-
)
305-
for file in files_to_delete:
306-
file.unlink()
332+
class TestPerfProfilerMixin:
333+
def run_perf(self, script_dir, perf_mode, script):
334+
raise NotImplementedError()
307335

308336
def test_python_calls_appear_in_the_stack_if_perf_activated(self):
309337
with temp_dir() as script_dir:
@@ -322,14 +350,14 @@ def baz(n):
322350
baz(10000000)
323351
"""
324352
script = make_script(script_dir, "perftest", code)
325-
stdout, stderr = run_perf(script_dir, sys.executable, "-Xperf", script)
353+
stdout, stderr = self.run_perf(script_dir, script)
326354
self.assertEqual(stderr, "")
327355

328356
self.assertIn(f"py::foo:{script}", stdout)
329357
self.assertIn(f"py::bar:{script}", stdout)
330358
self.assertIn(f"py::baz:{script}", stdout)
331359

332-
def test_python_calls_do_not_appear_in_the_stack_if_perf_activated(self):
360+
def test_python_calls_do_not_appear_in_the_stack_if_perf_deactivated(self):
333361
with temp_dir() as script_dir:
334362
code = """if 1:
335363
def foo(n):
@@ -346,7 +374,9 @@ def baz(n):
346374
baz(10000000)
347375
"""
348376
script = make_script(script_dir, "perftest", code)
349-
stdout, stderr = run_perf(script_dir, sys.executable, script)
377+
stdout, stderr = self.run_perf(
378+
script_dir, script, activate_trampoline=False
379+
)
350380
self.assertEqual(stderr, "")
351381

352382
self.assertNotIn(f"py::foo:{script}", stdout)
@@ -423,12 +453,56 @@ def compile_trampolines_for_all_functions():
423453
# identical in both the parent and child perf-map files.
424454
perf_file_lines = perf_file_contents.split("\n")
425455
for line in perf_file_lines:
426-
if (
427-
f"py::foo_fork:{script}" in line
428-
or f"py::bar_fork:{script}" in line
429-
):
456+
if f"py::foo_fork:{script}" in line or f"py::bar_fork:{script}" in line:
430457
self.assertIn(line, child_perf_file_contents)
431458

432459

460+
@unittest.skipUnless(perf_command_works(), "perf command doesn't work")
461+
@unittest.skipUnless(
462+
is_unwinding_reliable_with_frame_pointers(),
463+
"Unwinding is unreliable with frame pointers",
464+
)
465+
class TestPerfProfiler(unittest.TestCase, TestPerfProfilerMixin):
466+
def run_perf(self, script_dir, script, activate_trampoline=True):
467+
if activate_trampoline:
468+
return run_perf(script_dir, sys.executable, "-Xperf", script)
469+
return run_perf(script_dir, sys.executable, script)
470+
471+
def setUp(self):
472+
super().setUp()
473+
self.perf_files = set(pathlib.Path("/tmp/").glob("perf-*.map"))
474+
475+
def tearDown(self) -> None:
476+
super().tearDown()
477+
files_to_delete = (
478+
set(pathlib.Path("/tmp/").glob("perf-*.map")) - self.perf_files
479+
)
480+
for file in files_to_delete:
481+
file.unlink()
482+
483+
484+
@unittest.skipUnless(perf_command_works(), "perf command doesn't work")
485+
class TestPerfProfilerWithDwarf(unittest.TestCase, TestPerfProfilerMixin):
486+
def run_perf(self, script_dir, script, activate_trampoline=True):
487+
if activate_trampoline:
488+
return run_perf(
489+
script_dir, sys.executable, "-Xperfjit", script, use_jit=True
490+
)
491+
return run_perf(script_dir, sys.executable, script, use_jit=True)
492+
493+
def setUp(self):
494+
super().setUp()
495+
self.perf_files = set(pathlib.Path("/tmp/").glob("jit*.dump"))
496+
self.perf_files |= set(pathlib.Path("/tmp/").glob("jitted-*.so"))
497+
498+
def tearDown(self) -> None:
499+
super().tearDown()
500+
files_to_delete = set(pathlib.Path("/tmp/").glob("jit*.dump"))
501+
files_to_delete |= set(pathlib.Path("/tmp/").glob("jitted-*.so"))
502+
files_to_delete = files_to_delete - self.perf_files
503+
for file in files_to_delete:
504+
file.unlink()
505+
506+
433507
if __name__ == "__main__":
434508
unittest.main()

Lib/test/test_perfmaps.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,22 @@
77
if sys.platform != 'linux':
88
raise unittest.SkipTest('Linux only')
99

10+
def entry1():
11+
pass
12+
13+
def entry2():
14+
pass
1015

1116
class TestPerfMapWriting(unittest.TestCase):
1217
def test_write_perf_map_entry(self):
13-
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
14-
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
15-
with open(f"/tmp/perf-{os.getpid()}.map") as f:
16-
perf_file_contents = f.read()
17-
self.assertIn("1234 162e entry1", perf_file_contents)
18-
self.assertIn("2345 1a85 entry2", perf_file_contents)
19-
perf_map_state_teardown()
18+
sys.activate_stack_trampoline("perf")
19+
try:
20+
self.assertEqual(write_perf_map_entry(0x1234, 5678, entry1.__code__), 0)
21+
self.assertEqual(write_perf_map_entry(0x2345, 6789, entry2.__code__), 0)
22+
with open(f"/tmp/perf-{os.getpid()}.map") as f:
23+
perf_file_contents = f.read()
24+
self.assertIn("1234 162e py::entry1", perf_file_contents)
25+
self.assertIn("2345 1a85 py::entry2", perf_file_contents)
26+
perf_map_state_teardown()
27+
finally:
28+
sys.deactivate_stack_trampoline()

Modules/_testinternalcapi.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -893,9 +893,9 @@ write_perf_map_entry(PyObject *self, PyObject *args)
893893
PyObject *code_addr_v;
894894
const void *code_addr;
895895
unsigned int code_size;
896-
const char *entry_name;
896+
PyCodeObject *entry_name;
897897

898-
if (!PyArg_ParseTuple(args, "OIs", &code_addr_v, &code_size, &entry_name))
898+
if (!PyArg_ParseTuple(args, "OIO", &code_addr_v, &code_size, &entry_name))
899899
return NULL;
900900
code_addr = PyLong_AsVoidPtr(code_addr_v);
901901
if (code_addr == NULL) {

Python/asm_trampoline.S

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
# }
1010
_Py_trampoline_func_start:
1111
#ifdef __x86_64__
12-
sub $8, %rsp
12+
pushq %rbp
13+
movq %rsp, %rbp
1314
call *%rcx
14-
add $8, %rsp
15+
popq %rbp
1516
ret
1617
#endif // __x86_64__
1718
#if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)

Python/initconfig.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,20 @@ config_init_perf_profiling(PyConfig *config)
16971697
if (xoption) {
16981698
config->perf_profiling = 1;
16991699
}
1700+
env = config_get_env(config, "PYTHONPERFJITSUPPORT");
1701+
if (env) {
1702+
if (_Py_str_to_int(env, &active) != 0) {
1703+
active = 0;
1704+
}
1705+
if (active) {
1706+
config->perf_profiling = 2;
1707+
}
1708+
}
1709+
xoption = config_get_xoption(config, L"perfjit");
1710+
if (xoption) {
1711+
config->perf_profiling = 2;
1712+
}
1713+
17001714
return _PyStatus_OK();
17011715

17021716
}

Python/lel.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
typedef void* (py_evaluator)(void* ts, void* f, int t);
2+
3+
void*
4+
trampoline(void *ts, void *f,
5+
int throwflag, py_evaluator evaluator)
6+
{
7+
return evaluator(ts, f, throwflag);
8+
}

0 commit comments

Comments
 (0)