Project

General

Profile

Actions

Feature #12463

closed

ruby lacks plus-plus

Feature #12463: ruby lacks plus-plus

Added by shyouhei (Shyouhei Urabe) over 9 years ago. Updated over 8 years ago.

Status:
Rejected
Target version:
-
[ruby-core:75859]

Description

From c0f0d0c5f5c58d56fec95ca4303a0f5db5b54d56 Mon Sep 17 00:00:00 2001
From: "Urabe, Shyouhei"
Date: Mon, 6 Jun 2016 17:07:11 +0900
Subject: [PATCH 1/1] ruby lacks plus-plus operation.

This commit adds opt_plusplus VM instruction. Before this, a ruby
snippet foo += 1 was converted to up to 4 VM instructions. By
unifying these instructions into one, we can avoid unnecessary stack
manipulations and can issue dedicated machine instruction, if any.

This speeds up things. A set of microbenchmark results shows that it
gains up to 180% boost on a very tight loop.


raw data:

[["loop_whileloop", [[0.624962, 0.579907, 0.562363, 0.579649, 0.589442, 0.595911, 0.583657], [0.317089, 0.337159, 0.311376, 0.311721, 0.317585, 0.310999, 0.313107]]], ["loop_whileloop2", [[0.129038, 0.137306, 0.13944, 0.121268, 0.120862, 0.121065, 0.135851], [0.072271, 0.074998, 0.080031, 0.078602, 0.07527, 0.07627, 0.072029]]]] 

Elapsed time: 7.775304 (sec)


benchmark results:
minimum results in each 7 measurements.
Execution time (sec)

name | trunk| ours
---------------+------+--------
loop_whileloop | 0.562| 0.311
loop_whileloop2| 0.121| 0.072

Speedup ratio: compare with the result of `trunk' (greater is better)

name | ours
---------------+------
loop_whileloop | 1.808
loop_whileloop2| 1.678


Also for a nontrivial benchmark, optcarrot speeds up from 31.14 fps to
31.98 fps (102.6%). This is not that huge gain (disappointed), but it
does speeds up more or less.


raw data:

[["optcarrot", [[30.403074843558894, 30.310258944580042, 32.71944350007326], [31.959367571755536, 31.721515210358586, 32.26915908959967]]]] 

* configure.in (__builtin_add_overflow): check existence of this compiler intrinsic which is available in recent gcc and clang, with expectedly-optimal implementation of overflow detection.  * insns.def (opt_plusplus): new instruction. This unified instruction is expected to appear when some local variable is `+=1`ed. That was converted to 4 separate instructions before (load variable, load immediate one, call plus, store variable).  * insns.def (opt_plus): refactor move implementation to share with opt_plusplus.  * vm_insnhelper.c (opt_plus_func): moved to here.  * compile.c (iseq_peephole_optimize): generate opt_plusplus when possible.  Signed-off-by: Urabe, Shyouhei <shyouhei@ruby-lang.org> ---  compile.c | 46 ++++++++++++++++++++++++ configure.in | 1 + insns.def | 109 +++++++++++++++++++++++++++++++++++--------------------- vm_insnhelper.c | 62 ++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 41 deletions(-)  diff --git a/compile.c b/compile.c index cc496cb..6d99d0b 100644 --- a/compile.c +++ b/compile.c @@ -2254,6 +2254,52 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal	} } + if (IS_INSN_ID(iobj, getlocal)) { +	/* + * getlocal_OP__WC__0 + * putobject_OP_INT2FIX_O_1_C_ + * opt_plus + * setlocal_OP__WC__0 + * => + * opt_plusplus + */ +	INSN *nobj = (INSN *)iobj->link.next; +	VALUE idx = OPERAND_AT(iobj, 0); +	VALUE lev = OPERAND_AT(iobj, 1); +	if (IS_INSN((LINK_ELEMENT *)nobj) && + IS_INSN_ID(nobj, putobject)) { + if(OPERAND_AT(nobj, 0) == INT2FIX(1)) { +	nobj = (INSN * )nobj->link.next; +	if (IS_INSN((LINK_ELEMENT *)nobj) && + IS_INSN_ID(nobj, send)) { + struct rb_call_info *ci = (void *)OPERAND_AT(nobj, 0); + struct rb_call_cache *cc = (void *)OPERAND_AT(nobj, 1); + if ((ci->flag & VM_CALL_ARGS_SIMPLE) && +	(ci->orig_argc == 1) && +	(ci->mid == idPLUS)) { +	nobj = (INSN *)nobj->link.next; +	if (IS_INSN((LINK_ELEMENT *)nobj) && + IS_INSN_ID(nobj, setlocal)) { + if ((OPERAND_AT(nobj, 0) == idx) && +	(OPERAND_AT(nobj, 1) == lev)) { +	INSN *opt_plusplus = new_insn_body( + iseq, iobj->line_no, BIN(opt_plusplus), + 4, idx, lev, (VALUE)ci, (VALUE)cc); +	nobj = (INSN * )nobj->link.next; +	REMOVE_ELEM(nobj->link.prev->prev->prev->prev); +	REMOVE_ELEM(nobj->link.prev->prev->prev); +	REMOVE_ELEM(nobj->link.prev->prev); +	REMOVE_ELEM(nobj->link.prev); +	INSERT_ELEM_PREV( + &nobj->link, (LINK_ELEMENT *)opt_plusplus); + } +	} + } +	} + } +	} + } +  if (do_tailcallopt &&	(IS_INSN_ID(iobj, send) || IS_INSN_ID(iobj, opt_aref_with) || diff --git a/configure.in b/configure.in index 063d839..e0b7a91 100644 --- a/configure.in +++ b/configure.in @@ -2480,6 +2480,7 @@ if test x$rb_cv_builtin___builtin_choose_expr = xyes; then ]) fi RUBY_CHECK_BUILTIN_FUNC(__builtin_types_compatible_p, [__builtin_types_compatible_p(int, int)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_add_overflow, [__builtin_add_overflow(0, 0, (int*)0)])  if test "$ac_cv_func_qsort_r" != no; then AC_CACHE_CHECK(whether qsort_r is GNU version, rb_cv_gnu_qsort_r, diff --git a/insns.def b/insns.def index cdc8287..1438e29 100644 --- a/insns.def +++ b/insns.def @@ -1377,47 +1377,7 @@ opt_plus (VALUE recv, VALUE obj) (VALUE val) { - if (FIXNUM_2_P(recv, obj) && -	BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { -	/* fixnum + fixnum */ -#ifndef LONG_LONG_VALUE -	val = (recv + (obj & (~1))); -	if ((~(recv ^ obj) & (recv ^ val)) & - ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { - val = rb_big_plus(rb_int2big(FIX2LONG(recv)), - rb_int2big(FIX2LONG(obj))); -	} -#else -	long a, b, c; -	a = FIX2LONG(recv); -	b = FIX2LONG(obj); -	c = a + b; -	val = LONG2NUM(c); -#endif - } - else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { -	val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); - } - else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { -	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { - val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); -	} -	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { - val = rb_str_plus(recv, obj); -	} -	else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { - val = rb_ary_plus(recv, obj); -	} -	else { - goto INSN_LABEL(normal_dispatch); -	} - } - else { -	INSN_LABEL(normal_dispatch): + if ((val = opt_plus_func(recv, obj)) == Qundef) { 	PUSH(recv);	PUSH(obj);	CALL_SIMPLE_METHOD(recv); @@ -2163,6 +2123,73 @@ opt_call_c_function } /** + @c optimize + @e unified instruction that does getlocal + putobject + opt_plus + setlocal. + */ +DEFINE_INSN +opt_plusplus +(lindex_t idx, rb_num_t level, CALL_INFO ci, CALL_CACHE cc) +() +() +{ + int const lv = level; + VALUE *ep = GET_EP(); + VALUE *ptr; + VALUE tmp; + int i; + + for (i = 0; i < lv; i++) { +	ep = GET_PREV_EP(ep); + } + ptr = ep - idx; + + if (FIXNUM_P(*ptr) && + BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { +#if defined LONG_LONG_VALUE + long a, b; + a = FIX2LONG(*ptr); + b = a + 1; + *ptr = LONG2NUM(b); +#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW + const signed long one = INT2FIX(1) & ~RUBY_FIXNUM_FLAG; + signed long ret; + if (UNLIKELY(__builtin_saddl_overflow((signed long)*ptr, one, &ret))) { + *ptr = rb_big_plus(rb_int2big(FIX2LONG(*ptr)), + rb_int2big(FIX2LONG(1))); + } + else { + *ptr = (VALUE)ret; + } +#else + const VALUE a, b, c; + a = *ptr; + b = INT2FIX(1) & ~RUBY_FIXNUM_FLAG; + c = a + b; + if ((~(a ^ b) & (a ^ c)) & + ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { + *ptr = rb_big_plus(rb_int2big(FIX2LONG(a)), + rb_int2big(FIX2LONG(b))); + } + else { + *ptr = c; + } +#endif + } + else if ((tmp = opt_plus_func(*ptr, INT2FIX(1))) != Qundef) { + *ptr = tmp; + } + else { + VALUE val; + VALUE recv = *ptr; + +	PUSH(recv); +	PUSH(INT2FIX(1)); +	CALL_SIMPLE_METHOD(recv); + *ptr = val; + } +} + +/**  @c joke @e BLT @j BLT diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 691c37a..4ff21c5 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1209,6 +1209,68 @@ rb_equal_opt(VALUE obj1, VALUE obj2) return opt_eq_func(obj1, obj2, &ci, &cc); } +static inline VALUE +fixnum_plus_fixnum(VALUE i, VALUE j) +{ +#if defined LONG_LONG_VALUE + long a, b, c; + a = FIX2LONG(i); + b = FIX2LONG(j); + c = a + b; + return LONG2NUM(c); +#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW + signed long ret; + if (__builtin_saddl_overflow(i, j & ~RUBY_FIXNUM_FLAG, &ret)) { +	return rb_big_plus(rb_int2big(FIX2LONG(i)), + rb_int2big(FIX2LONG(j))); + } + else { +	return (VALUE)ret; + } +#else + VALUE val = (i + (j & ~RUBY_FIXNUM_FLAG)); + if ((~(i ^ j) & (i ^ val)) & +	((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { +	val = rb_big_plus(rb_int2big(FIX2LONG(i)), + rb_int2big(FIX2LONG(j))); + } + return val; +#endif +} + +static inline VALUE +opt_plus_func(VALUE recv, VALUE obj) +{ + if (FIXNUM_2_P(recv, obj) && +	BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { +	return fixnum_plus_fixnum(recv, obj); + } + else if (FLONUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { +	return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); + } + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { +	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { + return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); +	} +	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { + return rb_str_plus(recv, obj); +	} +	else if (RBASIC_CLASS(recv) == rb_cArray && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { + return rb_ary_plus(recv, obj); +	} +	else { + return Qundef; +	} + } + else { +	return Qundef; + } +} +  static VALUE vm_call0(rb_thread_t*, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *); static VALUE -- 2.8.3  

Related issues 1 (0 open1 closed)

Actions

Also available in: PDF Atom