Skip to content

Commit 737f8d5

Browse files
committed
remove pointless SVPV mortal alloc in S_strip_spaces()/prototype parser
Valid, parseable, and sane prototypes, are tiny in char len and often fit on 1 hand, most extreme cases in production would be 80 chars, because terminal length. Original commit referred to mitigating sloppy XS code with random white space. Majority of XS code will have perfect clean prototype strings. So not SV Alloc, PV Alloc, MEXTENDPUSH, memcpy(), and alot more free()s in scope _dec(), for clean strings. Even for dirty but parsable prototypes, they will be tiny in bytes. Therefore use a tiny stack buffer for dirty semi-hot path to remove overhead. Fuzzing, junk, abuse, can OOM die in newSV()/malloc() if needed, same as in prior version of the code. Use newSV(len) and POK_off, SV head is private to us, and a waste to bookkeep SVPV details. SAVEFREEPV() was not used, because previous code did mortal, and not SAVEFREEPV(), so keep using mortal. This can be changed if someone has rational to do it. 64 bytes was picked since its power of 2, and below 80, but not 16 or 32 bytes which is too small and C compiler would probably be leaving padding at the end of the array anyways. Going to 80 or 100, was discarded, to give breathing room on C stack to C compilers so on x86/x64 C compilers can emit U8 arithmitic with U8 constants to access various C autos, and the CC doesn't downgrade to U32 constants to access various local C auto. Worst case production Perl code for prototype length, is a URL argument template system on CPAN, and even urls, are unlikely to be longer than a desktop's URL bar or 80 char terminals. So 64 byte was picked. The struct provides type matching and length safety between S_strip_spaces() and its callers.
1 parent 82c4939 commit 737f8d5

File tree

3 files changed

+62
-18
lines changed

3 files changed

+62
-18
lines changed

inline.h

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -260,27 +260,65 @@ Perl_CvDEPTH(const CV * const sv)
260260
was added by toke.c, but is generally not the case if it was added elsewhere.
261261
Since we can't enforce the spacelessness at assignment time, this routine
262262
provides a temporary copy at parse time with spaces removed.
263+
263264
I<orig> is the start of the original buffer, I<len> is the length of the
264-
prototype and will be updated when this returns.
265+
prototype and will be updated or stay the same when this returns.
266+
I<small_buf> is an optional small C stack char array buffer
267+
of type I<STRIP_WS_BUF_T>, Caller must use I<STRIP_WS_PICK_BUF_TYPE(&buf,len)>
268+
to pick if I<small_buf> will be used or not.
269+
270+
If I<small_buf> is not used a temporary self-freeing new buffer
271+
(mortal but subject to change), will be malloc-ed and returned. The return
272+
pointer can be the original I<orig> ptr, or I<small_buf> ptr, or a temporary
273+
already mortalized I<malloc> memory. It is supposed to be opaque how the retval
274+
I<char *> was allocated. It will not leak, and you can't take ownership of it.
275+
It will free itself somehow, in the near future when you leave the current C
276+
function scope.
265277
*/
266278

267279
#ifdef PERL_CORE
280+
# if defined(PERL_IN_OP_C) || defined(PERL_IN_TOKE_C)
281+
282+
typedef struct {
283+
char small_buf [64];
284+
} STRIP_WS_BUF_T;
285+
286+
#define STRIP_WS_PICK_BUF_TYPE(_buf, _len) \
287+
(((_len) < (sizeof(STRIP_WS_BUF_T)-2)) ? (_buf) : NULL)
288+
#define strip_spaces(_smlb, _op, _plen) \
289+
S_strip_spaces(aTHX_ (_smlb), (_op), (_plen))
290+
268291
PERL_STATIC_INLINE char *
269-
S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
292+
S_strip_spaces(pTHX_ STRIP_WS_BUF_T * small_buf, const char * orig, STRLEN * const ptolen)
270293
{
271-
SV * tmpsv;
272-
char * tmps;
273-
tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
274-
tmps = SvPVX(tmpsv);
275-
while ((*len)--) {
276-
if (!isSPACE(*orig))
277-
*tmps++ = *orig;
278-
orig++;
279-
}
280-
*tmps = '\0';
281-
*len = tmps - SvPVX(tmpsv);
282-
return SvPVX(tmpsv);
294+
STRLEN len = *ptolen;
295+
const char * orig_p = orig;
296+
const char * orig_end = orig + len;
297+
298+
while (orig_p < orig_end) {
299+
if(isSPACE(*orig_p)) { /* no memchr()! think memspn() */
300+
char * tmps;
301+
char * tmps_p;
302+
if(small_buf)
303+
tmps = (char *)&small_buf->small_buf[0];
304+
else
305+
tmps = SvPVX(sv_2mortal(newSV(len+1)));
306+
tmps_p = tmps+(orig_p-orig);
307+
Move(orig, tmps, (Size_t)(orig_p-orig), char);
308+
while (orig_p < orig_end) {
309+
if(!isSPACE(*orig_p))
310+
*tmps_p++ = *orig_p;
311+
orig_p++;
312+
}
313+
*tmps_p = '\0';
314+
*ptolen = tmps_p - tmps;
315+
return tmps; /* return cleaned string */
316+
}
317+
orig_p++;
318+
} /* ptolen was untouched */
319+
return (char *)orig; /* original string unmodified */
283320
}
321+
# endif
284322
#endif
285323

286324
/* ------------------------------- iperlsys.h ------------------------------- */

op.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9977,6 +9977,8 @@ void
99779977
Perl_cv_ckproto_len_flags(pTHX_ const CV *cv, const GV *gv, const char *p,
99789978
const STRLEN len, const U32 flags)
99799979
{
9980+
STRIP_WS_BUF_T pbuf;
9981+
STRIP_WS_BUF_T cbuf;
99809982
SV *name = NULL, *msg;
99819983
const char * cvp = SvROK(cv)
99829984
? SvTYPE(SvRV_const(cv)) == SVt_PVCV
@@ -9994,8 +9996,8 @@ Perl_cv_ckproto_len_flags(pTHX_ const CV *cv, const GV *gv, const char *p,
99949996
return;
99959997

99969998
if (p && cvp) {
9997-
p = S_strip_spaces(aTHX_ p, &plen);
9998-
cvp = S_strip_spaces(aTHX_ cvp, &clen);
9999+
p = strip_spaces(STRIP_WS_PICK_BUF_TYPE(&pbuf, plen), p, &plen);
10000+
cvp = strip_spaces(STRIP_WS_PICK_BUF_TYPE(&cbuf, clen), cvp, &clen);
999910001
if ((flags & SVf_UTF8) == SvUTF8(cv)) {
1000010002
if (plen == clen && memEQ(cvp, p, plen))
1000110003
return;
@@ -14362,6 +14364,7 @@ OP *
1436214364
Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv)
1436314365
{
1436414366
STRLEN proto_len;
14367+
STRIP_WS_BUF_T protocleanbuf;
1436514368
const char *proto, *proto_end;
1436614369
OP *aop, *prev, *cvop, *parent;
1436714370
int optional = 0;
@@ -14375,7 +14378,8 @@ Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv)
1437514378
if (SvTYPE(protosv) == SVt_PVCV)
1437614379
proto = CvPROTO(protosv), proto_len = CvPROTOLEN(protosv);
1437714380
else proto = SvPV(protosv, proto_len);
14378-
proto = S_strip_spaces(aTHX_ proto, &proto_len);
14381+
proto = strip_spaces( STRIP_WS_PICK_BUF_TYPE(&protocleanbuf, proto_len),
14382+
proto, &proto_len);
1437914383
proto_end = proto + proto_len;
1438014384
parent = entersubop;
1438114385
aop = cUNOPx(entersubop)->op_first;

toke.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6281,11 +6281,13 @@ yyl_colon(pTHX_ char *s)
62816281
static int
62826282
yyl_subproto(pTHX_ char *s, CV *cv)
62836283
{
6284+
STRIP_WS_BUF_T protocleanbuf;
62846285
STRLEN protolen = CvPROTOLEN(cv);
62856286
const char *proto = CvPROTO(cv);
62866287
bool optional;
62876288

6288-
proto = S_strip_spaces(aTHX_ proto, &protolen);
6289+
proto = strip_spaces( STRIP_WS_PICK_BUF_TYPE(&protocleanbuf, protolen),
6290+
proto, &protolen);
62896291
if (!protolen)
62906292
TERM(FUNC0SUB);
62916293
if ((optional = *proto == ';')) {

0 commit comments

Comments
 (0)