ARM-software
diff --git a/‎math/aarch64/advsimd/log10p1.c‎
Lines changed: 138 additions & 0 deletions b/‎math/aarch64/advsimd/log10p1.c‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎math/include/mathlib.h‎
Lines changed: 1 addition & 0 deletions b/‎math/include/mathlib.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎math/test/c23_references.h‎
Lines changed: 10 additions & 1 deletion b/‎math/test/c23_references.h‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎math/test/mathbench_funcs.h‎
Lines changed: 1 addition & 0 deletions b/‎math/test/mathbench_funcs.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎math/test/ulp_funcs.h‎
Lines changed: 2 additions & 1 deletion b/‎math/test/ulp_funcs.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎math/test/ulp_wrappers.h‎
Lines changed: 1 addition & 0 deletions b/‎math/test/ulp_wrappers.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎math/tools/log10p1.sollya‎
Lines changed: 21 additions & 0 deletions b/‎math/tools/log10p1.sollya‎
Lines changed: 21 additions & 0 deletions
@@ -0,0 +1,138 @@
+/*
+ * Doube-precision vector log10(1+x) function.
+ *
+ * Copyright (c) 2025, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "test_defs.h"
+#include "v_math.h"
+
+static const struct data
+{
+ float64x2_t c1, c2, c3, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+ double c5, c7, c9, c11, c13, c15, c17, c19;
+ double inv_log2_10, inv_ln10, one, minus_one;
+ int64x2_t one_top;
+ uint64x2_t one_m_hf_rt2_top, umask, hf_rt2_top, bottom_mask;
+ uint64x2_t inf, minf, nan;
+} data = {
+ /* Coefficients generated using FPMinimax deg=20, in
+ [sqrt(2)/2-1, sqrt(2)-1]. */
+ .c1 = V2 (0x1.bcb7b1526e50fp-2),
+ .c2 = V2 (-0x1.bcb7b1526e4fep-3),
+ .c3 = V2 (0x1.287a7636f3cfp-3),
+ .c4 = V2 (-0x1.bcb7b152707cap-4),
+ .c5 = 0x1.63c62775e6667p-4,
+ .c6 = V2 (-0x1.287a76368cf37p-4),
+ .c7 = 0x1.fc3fa57ed69cep-5,
+ .c8 = V2 (-0x1.bcb7b0eed8335p-5),
+ .c9 = 0x1.8b4e10d4ecd69p-5,
+ .c10 = V2 (-0x1.63c65554f2db4p-5),
+ .c11 = 0x1.436b003e5358p-5,
+ .c12 = V2 (-0x1.2872d378b6363p-5),
+ .c13 = 0x1.11e15dd8ac0efp-5,
+ .c14 = V2 (-0x1.fd8dc08e6b21p-6),
+ .c15 = 0x1.d6fabf7e5c622p-6,
+ .c16 = V2 (-0x1.ad53855566e62p-6),
+ .c17 = 0x1.a9547f6043884p-6,
+ .c18 = V2 (-0x1.e4a167fcd3e22p-6),
+ .c19 = 0x1.c2f6859a15a65p-6,
+ .c20 = V2 (-0x1.91c6df82d809bp-7),
+ .hf_rt2_top = V2 (0x3fe6a09e00000000),
+ .one_m_hf_rt2_top = V2 (0x00095f6200000000),
+ .umask = V2 (0x000fffff00000000),
+ .one_top = V2 (0x3ff),
+ .inv_ln10 = 0x1.bcb7b1526e50ep-2,
+ .inv_log2_10 = 0x1.34413509f79ffp-2,
+ .inf = V2 (0x7ff0000000000000),
+ .minf = V2 (0xfff0000000000000),
+ .nan = V2 (0x7fffffffffffffff),
+ .bottom_mask = V2 (0xffffffff),
+ .minus_one = -1.0f,
+ .one = 1.0f,
+};
+
+static inline float64x2_t
+special_case (const struct data *d, float64x2_t x, float64x2_t y,
+ uint64x2_t cmp)
+{
+ uint64x2_t ret_inf = vcgeq_f64 (x, vreinterpretq_f64_u64 (d->inf));
+ uint64x2_t neg_val
+ = vbslq_u64 (vcgeq_f64 (x, v_f64 (d->minus_one)), d->minf, d->nan);
+ float64x2_t s = vreinterpretq_f64_u64 (vbslq_u64 (ret_inf, d->inf, neg_val));
+ return vbslq_f64 (cmp, s, y);
+}
+
+/* Vector log10p1 approximation using polynomial on reduced interval.
+ Worst-case error is 2.69 ULP:
+ _ZGVnN2v_log10p1(-0x1.2582542cd267p-15) got -0x1.fde2ee0eb629p-17
+ want -0x1.fde2ee0eb628dp-17 . */
+VPCS_ATTR float64x2_t V_NAME_D1 (log10p1) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Calculate scaling factor k. */
+ float64x2_t m = vaddq_f64 (x, v_f64 (d->one));
+ uint64x2_t mi = vreinterpretq_u64_f64 (m);
+ uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
+ int64x2_t ki
+ = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
+ float64x2_t k = vcvtq_f64_s64 (ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt (2)]. */
+ uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
+ uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, d->bottom_mask));
+ float64x2_t f
+ = vaddq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (d->minus_one));
+ /* Correction term c/m. */
+ float64x2_t cm
+ = vdivq_f64 (vsubq_f64 (x, vaddq_f64 (m, v_f64 (d->minus_one))), m);
+
+ /* Order-18 Pairwise Horner evaluation scheme. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t c57 = vld1q_f64 (&d->c5);
+ float64x2_t c911 = vld1q_f64 (&d->c9);
+ float64x2_t c1315 = vld1q_f64 (&d->c13);
+ float64x2_t c1719 = vld1q_f64 (&d->c17);
+ float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, f, c1719, 1);
+ float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, f, c1719, 0);
+ float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, f, c1315, 1);
+ float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, f, c1315, 0);
+ float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, f, c911, 1);
+ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, f, c911, 0);
+
+ float64x2_t p67 = vfmaq_laneq_f64 (d->c6, f, c57, 1);
+ float64x2_t p45 = vfmaq_laneq_f64 (d->c4, f, c57, 0);
+ float64x2_t p23 = vfmaq_f64 (d->c2, f, d->c3);
+ float64x2_t p = vfmaq_f64 (p1819, f2, d->c20);
+ p = vfmaq_f64 (p1617, f2, p);
+ p = vfmaq_f64 (p1415, f2, p);
+ p = vfmaq_f64 (p1213, f2, p);
+ p = vfmaq_f64 (p1011, f2, p);
+ p = vfmaq_f64 (p89, f2, p);
+ p = vfmaq_f64 (p67, f2, p);
+ p = vfmaq_f64 (p45, f2, p);
+ p = vfmaq_f64 (p23, f2, p);
+ p = vfmaq_f64 (d->c1, f, p);
+
+ float64x2_t inv_log_consts = vld1q_f64 (&d->inv_log2_10);
+ /* Assemble log10p1(x) = k/log2(10) + log10p1(f) + c/(m * ln10). */
+ float64x2_t y = vfmaq_laneq_f64 (vmulq_f64 (p, f), k, inv_log_consts, 0);
+ y = vfmaq_laneq_f64 (y, cm, inv_log_consts, 1);
+
+ /* Special cases: x == (-inf), x == nan, x <= -1 . */
+ uint64x2_t special
+ = vorrq_u64 (vcleq_f64 (x, v_f64 (d->minus_one)),
+ vcgeq_f64 (x, vreinterpretq_f64_u64 (d->inf)));
+ if (unlikely (v_any_u64 (special)))
+ return special_case (d, x, y, special);
+ return y;
+}
+#if WANT_C23_TESTS
+TEST_ULP (V_NAME_D1 (log10p1), 2.20)
+TEST_SYM_INTERVAL (V_NAME_D1 (log10p1), 0.0, 0x1p-23, 30000)
+TEST_SYM_INTERVAL (V_NAME_D1 (log10p1), 0x1p-23, 1, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10p1), 1, inf, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10p1), -1.0, -inf, 1000)
+#endif
@@ -179,6 +179,7 @@ __vpcs float64x2_t _ZGVnN2v_exp2m1 (float64x2_t);
 __vpcs float64x2_t _ZGVnN2v_expm1 (float64x2_t);
 __vpcs float64x2_t _ZGVnN2v_log (float64x2_t);
 __vpcs float64x2_t _ZGVnN2v_log10 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log10p1 (float64x2_t);
 __vpcs float64x2_t _ZGVnN2v_log1p (float64x2_t);
 __vpcs float64x2_t _ZGVnN2v_log2 (float64x2_t);
 __vpcs float64x2_t _ZGVnN2v_log2p1 (float64x2_t);
 
@@ -11,11 +11,14 @@
 # define M_PIl 3.141592653589793238462643383279502884l
 #endif
 #ifndef M_INV_LOG2l
-# define M_INV_LOG2l 0x1.71547652b82fep+0
+# define M_INV_LOG2l 0x1.71547652b82fe1777d0ffda0d23a7d11d6aef551cp+0
 #endif
 #ifndef M_INV_LOG10
 # define M_INV_LOG10 0x1.bcb7b1526e50ep-2
 #endif
+#ifndef M_INV_LOG10l
+# define M_INV_LOG10l 0x1.bcb7b1526e50e32a6ab7555f5a67b8647dc68c049p-2l
+#endif
 #ifndef M_LOG2
 # define M_LOG2 0x1.62e42fefa39efp-1
 #endif
@@ -211,3 +214,9 @@ arm_math_log10p1 (double x)
 {
  return log1p (x) * M_INV_LOG10;
 }
+
+long double
+arm_math_log10p1l (long double x)
+{
+ return log1pl (x) * M_INV_LOG10l;
+}
@@ -81,6 +81,7 @@ VND (_ZGVnN2v_exp2m1, -10.0, 10.0)
 VNF (_ZGVnN4v_log2p1f, -0.9, 10)
 VND (_ZGVnN2v_log2p1, -0.9, 10)
 VNF (_ZGVnN4v_log10p1f, -0.9, 10)
+VND (_ZGVnN2v_log10p1, -0.9, 10)
 VNF (_ZGVnN4v_sinpif, -0.9, 0.9)
 VND (_ZGVnN2v_sinpi, -0.9, 0.9)
 VNF (_ZGVnN4v_tanpif, -0.9, 0.9)
 
@@ -89,8 +89,9 @@ SVF (_ZGVsMxvl8_modf_int, sv_modf_int, modfl_int, modf_mpfr_int, 1, 0, d1, 0)
  F (_ZGVnN4v_exp2m1f, Z_exp2m1f, arm_math_exp2m1, mpfr_exp2m1, 1, 1, f1, 0)
  F (_ZGVnN2v_exp2m1, Z_exp2m1, arm_math_exp2m1l, mpfr_exp2m1, 1, 0, d1, 0)
  F (_ZGVnN4v_log2p1f, Z_log2p1f, arm_math_log2p1, mpfr_log2p1, 1, 1, f1, 0)
-  F (_ZGVnN2v_log2p1, Z_log2p1, arm_math_log2p1l, mpfr_log2p1, 1, 0, d1, 0)
+ F (_ZGVnN2v_log2p1, Z_log2p1, arm_math_log2p1l, mpfr_log2p1, 1, 0, d1, 0)
  F (_ZGVnN4v_log10p1f, Z_log10p1f, arm_math_log10p1, mpfr_log10p1, 1, 1, f1, 0)
+ F (_ZGVnN2v_log10p1, Z_log10p1, arm_math_log10p1l, mpfr_log10p1, 1, 0, d1, 0)
  F (_ZGVnN4v_sinpif, Z_sinpif, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
  F (_ZGVnN2v_sinpi, Z_sinpi, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
  F (_ZGVnN4v_tanpif, Z_tanpif, arm_math_tanpi, mpfr_tanpi, 1, 1, f1, 0)
 
@@ -247,6 +247,7 @@ ZVND1_WRAP (exp2m1)
 ZVNF1_WRAP (log2p1)
 ZVND1_WRAP (log2p1)
 ZVNF1_WRAP (log10p1)
+ZVND1_WRAP (log10p1)
 ZVNF1_WRAP (sinpi)
 ZVND1_WRAP (sinpi)
 ZVNF1_WRAP (tanpi)
 
@@ -0,0 +1,21 @@
+// polynomial for approximating log10(1+x) in double precision
+//
+// Copyright (c) 2025, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 20;
+
+a = sqrt(2)/2-1;
+b = sqrt(2)-1;
+
+f = proc(y) {
+ return log10(1+y);
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [a;b]);
+
+print("coeffs:");
+display = hexadecimal;
+for i from 0 to deg do coeff(poly,i);
+print("rel error:", dirtyinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");