Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``_lsprof`` module now uses ``_PyTime_GetPerfCounter`` private API for
default timer. It will use better timer depending on platform. Patch by
Inada Naoki.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, _PyTime_GetPerfCounter() is the same than time.perf_counter(). Maybe mention time.perf_counter() which is public, rather than mentioning a private method? You may explain that time.perf_counter() is monotonic, whereas previously _lsprof uses time.time() which isn't monotonic.

"It will use better timer depending on platform." well, you can explain that the new implementation now has internally nanosecond resolution, whereas the old implementation of the default timer only has a ressolution of 1 microsecond.

I suggest:

The _lsprof module now supports internally nanosecond resolution for timing. It now uses time.perf_counter() rather than time.time() (use _PyTime_GetPerfCounter private API which has nanosecond resolution, rather than gettimeofday() which has microsecond resolution). Timings are no more impact by system clock updates, since perf_counter() is monotonic. Patch by Inada Naoki.

122 changes: 36 additions & 86 deletions Modules/_lsprof.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,6 @@
#include "frameobject.h"
#include "rotatingtree.h"

/*** Selection of a high-precision timer ***/

#ifdef MS_WINDOWS

#include <windows.h>

static long long
hpTimer(void)
{
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
return li.QuadPart;
}

static double
hpTimerUnit(void)
{
LARGE_INTEGER li;
if (QueryPerformanceFrequency(&li))
return 1.0 / li.QuadPart;
else
return 0.000001; /* unlikely */
}

#else /* !MS_WINDOWS */

#ifndef HAVE_GETTIMEOFDAY
#error "This module requires gettimeofday() on non-Windows platforms!"
#endif

#include <sys/resource.h>
#include <sys/times.h>

static long long
hpTimer(void)
{
struct timeval tv;
long long ret;
#ifdef GETTIMEOFDAY_NO_TZ
gettimeofday(&tv);
#else
gettimeofday(&tv, (struct timezone *)NULL);
#endif
ret = tv.tv_sec;
ret = ret * 1000000 + tv.tv_usec;
return ret;
}

static double
hpTimerUnit(void)
{
return 0.000001;
}

#endif /* MS_WINDOWS */

/************************************************************/
/* Written by Brett Rosen and Ted Czotter */

Expand All @@ -66,8 +10,8 @@ struct _ProfilerEntry;
/* represents a function called from another function */
typedef struct _ProfilerSubEntry {
rotating_node_t header;
long long tt;
long long it;
_PyTime_t tt;
_PyTime_t it;
long callcount;
long recursivecallcount;
long recursionLevel;
Expand All @@ -77,17 +21,17 @@ typedef struct _ProfilerSubEntry {
typedef struct _ProfilerEntry {
rotating_node_t header;
PyObject *userObj; /* PyCodeObject, or a descriptive str for builtins */
long long tt; /* total time in this entry */
long long it; /* inline time in this entry (not in subcalls) */
_PyTime_t tt; /* total time in this entry */
_PyTime_t it; /* inline time in this entry (not in subcalls) */
long callcount; /* how many times this was called */
long recursivecallcount; /* how many times called recursively */
long recursionLevel;
rotating_node_t *calls;
} ProfilerEntry;

typedef struct _ProfilerContext {
long long t0;
long long subt;
_PyTime_t t0;
_PyTime_t subt;
struct _ProfilerContext *previous;
ProfilerEntry *ctxEntry;
} ProfilerContext;
Expand All @@ -114,41 +58,46 @@ static PyTypeObject PyProfiler_Type;

/*** External Timers ***/

#define DOUBLE_TIMER_PRECISION 4294967296.0
static PyObject *empty_tuple;

static long long CallExternalTimer(ProfilerObject *pObj)
static _PyTime_t CallExternalTimer(ProfilerObject *pObj)
{
long long result;
PyObject *o = PyObject_Call(pObj->externalTimer, empty_tuple, NULL);
PyObject *o = _PyObject_CallNoArg(pObj->externalTimer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's nice to see such micro-optimization going away thanks to FASTCALL ;-)

if (o == NULL) {
PyErr_WriteUnraisable(pObj->externalTimer);
return 0;
}

_PyTime_t result;
int err;
if (pObj->externalTimerUnit > 0.0) {
/* interpret the result as an integer that will be scaled
in profiler_getstats() */
result = PyLong_AsLongLong(o);
err = _PyTime_FromNanosecondsObject(&result, o);
}
else {
/* interpret the result as a double measured in seconds.
As the profiler works with long long internally
As the profiler works with _PyTime_t internally
we convert it to a large integer */
double val = PyFloat_AsDouble(o);
/* error handling delayed to the code below */
result = (long long) (val * DOUBLE_TIMER_PRECISION);
err = _PyTime_FromSecondsObject(&result, o, _PyTime_ROUND_FLOOR);
}
Py_DECREF(o);
if (PyErr_Occurred()) {
if (err < 0) {
PyErr_WriteUnraisable(pObj->externalTimer);
return 0;
}
return result;
}

#define CALL_TIMER(pObj) ((pObj)->externalTimer ? \
CallExternalTimer(pObj) : \
hpTimer())
static inline _PyTime_t
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for converting macros to static inline :-)

call_timer(ProfilerObject *pObj)
{
if (pObj->externalTimer != NULL) {
return CallExternalTimer(pObj);
}
else {
return _PyTime_GetPerfCounter();
}
}


/*** ProfilerObject ***/

Expand Down Expand Up @@ -332,14 +281,14 @@ initContext(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry)
if (subentry)
++subentry->recursionLevel;
}
self->t0 = CALL_TIMER(pObj);
self->t0 = call_timer(pObj);
}

static void
Stop(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry)
{
long long tt = CALL_TIMER(pObj) - self->t0;
long long it = tt - self->subt;
_PyTime_t tt = call_timer(pObj) - self->t0;
_PyTime_t it = tt - self->subt;
if (self->previous)
self->previous->subt += tt;
pObj->currentProfilerContext = self->previous;
Expand Down Expand Up @@ -631,12 +580,14 @@ profiler_getstats(ProfilerObject *pObj, PyObject* noarg)
statscollector_t collect;
if (pending_exception(pObj))
return NULL;
if (!pObj->externalTimer)
collect.factor = hpTimerUnit();
else if (pObj->externalTimerUnit > 0.0)
if (!pObj->externalTimer || pObj->externalTimerUnit == 0.0) {
_PyTime_t onesec = _PyTime_FromSeconds(1);
collect.factor = (double)1 / onesec;
}
else {
collect.factor = pObj->externalTimerUnit;
else
collect.factor = 1.0 / DOUBLE_TIMER_PRECISION;
}

collect.list = PyList_New(0);
if (collect.list == NULL)
return NULL;
Expand Down Expand Up @@ -882,7 +833,6 @@ PyInit__lsprof(void)
(PyObject*) &StatsEntryType);
PyModule_AddObject(module, "profiler_subentry",
(PyObject*) &StatsSubEntryType);
empty_tuple = PyTuple_New(0);
initialized = 1;
return module;
}