Skip to content

Commit 7fc8510

Browse files
Lambxxblapie
authored andcommitted
math/aarch64/advsimd: Implement log10p1
Approach, uses coefficients for log10(x+1) and then does: log10p1(x) = k/log2(10) + log10p1(f) + c/(m * ln10) instead of log2p1(x) = k + log2p1(f) + c/(m * ln2) I have also tried using log2p1 and then multiple by log10(2) as last operation but gives ulp > 3.5
1 parent d07cb7c commit 7fc8510

File tree

7 files changed

+174
-2
lines changed

7 files changed

+174
-2
lines changed

math/aarch64/advsimd/log10p1.c

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
* Doube-precision vector log10(1+x) function.
3+
*
4+
* Copyright (c) 2025, Arm Limited.
5+
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6+
*/
7+
8+
#include "test_defs.h"
9+
#include "v_math.h"
10+
11+
static const struct data
12+
{
13+
float64x2_t c1, c2, c3, c4, c6, c8, c10, c12, c14, c16, c18, c20;
14+
double c5, c7, c9, c11, c13, c15, c17, c19;
15+
double inv_log2_10, inv_ln10, one, minus_one;
16+
int64x2_t one_top;
17+
uint64x2_t one_m_hf_rt2_top, umask, hf_rt2_top, bottom_mask;
18+
uint64x2_t inf, minf, nan;
19+
} data = {
20+
/* Coefficients generated using FPMinimax deg=20, in
21+
[sqrt(2)/2-1, sqrt(2)-1]. */
22+
.c1 = V2 (0x1.bcb7b1526e50fp-2),
23+
.c2 = V2 (-0x1.bcb7b1526e4fep-3),
24+
.c3 = V2 (0x1.287a7636f3cfp-3),
25+
.c4 = V2 (-0x1.bcb7b152707cap-4),
26+
.c5 = 0x1.63c62775e6667p-4,
27+
.c6 = V2 (-0x1.287a76368cf37p-4),
28+
.c7 = 0x1.fc3fa57ed69cep-5,
29+
.c8 = V2 (-0x1.bcb7b0eed8335p-5),
30+
.c9 = 0x1.8b4e10d4ecd69p-5,
31+
.c10 = V2 (-0x1.63c65554f2db4p-5),
32+
.c11 = 0x1.436b003e5358p-5,
33+
.c12 = V2 (-0x1.2872d378b6363p-5),
34+
.c13 = 0x1.11e15dd8ac0efp-5,
35+
.c14 = V2 (-0x1.fd8dc08e6b21p-6),
36+
.c15 = 0x1.d6fabf7e5c622p-6,
37+
.c16 = V2 (-0x1.ad53855566e62p-6),
38+
.c17 = 0x1.a9547f6043884p-6,
39+
.c18 = V2 (-0x1.e4a167fcd3e22p-6),
40+
.c19 = 0x1.c2f6859a15a65p-6,
41+
.c20 = V2 (-0x1.91c6df82d809bp-7),
42+
.hf_rt2_top = V2 (0x3fe6a09e00000000),
43+
.one_m_hf_rt2_top = V2 (0x00095f6200000000),
44+
.umask = V2 (0x000fffff00000000),
45+
.one_top = V2 (0x3ff),
46+
.inv_ln10 = 0x1.bcb7b1526e50ep-2,
47+
.inv_log2_10 = 0x1.34413509f79ffp-2,
48+
.inf = V2 (0x7ff0000000000000),
49+
.minf = V2 (0xfff0000000000000),
50+
.nan = V2 (0x7fffffffffffffff),
51+
.bottom_mask = V2 (0xffffffff),
52+
.minus_one = -1.0f,
53+
.one = 1.0f,
54+
};
55+
56+
static inline float64x2_t
57+
special_case (const struct data *d, float64x2_t x, float64x2_t y,
58+
uint64x2_t cmp)
59+
{
60+
uint64x2_t ret_inf = vcgeq_f64 (x, vreinterpretq_f64_u64 (d->inf));
61+
uint64x2_t neg_val
62+
= vbslq_u64 (vcgeq_f64 (x, v_f64 (d->minus_one)), d->minf, d->nan);
63+
float64x2_t s = vreinterpretq_f64_u64 (vbslq_u64 (ret_inf, d->inf, neg_val));
64+
return vbslq_f64 (cmp, s, y);
65+
}
66+
67+
/* Vector log10p1 approximation using polynomial on reduced interval.
68+
Worst-case error is 2.69 ULP:
69+
_ZGVnN2v_log10p1(-0x1.2582542cd267p-15) got -0x1.fde2ee0eb629p-17
70+
want -0x1.fde2ee0eb628dp-17 . */
71+
VPCS_ATTR float64x2_t V_NAME_D1 (log10p1) (float64x2_t x)
72+
{
73+
const struct data *d = ptr_barrier (&data);
74+
75+
/* Calculate scaling factor k. */
76+
float64x2_t m = vaddq_f64 (x, v_f64 (d->one));
77+
uint64x2_t mi = vreinterpretq_u64_f64 (m);
78+
uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
79+
int64x2_t ki
80+
= vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
81+
float64x2_t k = vcvtq_f64_s64 (ki);
82+
83+
/* Reduce x to f in [sqrt(2)/2, sqrt (2)]. */
84+
uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
85+
uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, d->bottom_mask));
86+
float64x2_t f
87+
= vaddq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (d->minus_one));
88+
/* Correction term c/m. */
89+
float64x2_t cm
90+
= vdivq_f64 (vsubq_f64 (x, vaddq_f64 (m, v_f64 (d->minus_one))), m);
91+
92+
/* Order-18 Pairwise Horner evaluation scheme. */
93+
float64x2_t f2 = vmulq_f64 (f, f);
94+
float64x2_t c57 = vld1q_f64 (&d->c5);
95+
float64x2_t c911 = vld1q_f64 (&d->c9);
96+
float64x2_t c1315 = vld1q_f64 (&d->c13);
97+
float64x2_t c1719 = vld1q_f64 (&d->c17);
98+
float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, f, c1719, 1);
99+
float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, f, c1719, 0);
100+
float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, f, c1315, 1);
101+
float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, f, c1315, 0);
102+
float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, f, c911, 1);
103+
float64x2_t p89 = vfmaq_laneq_f64 (d->c8, f, c911, 0);
104+
105+
float64x2_t p67 = vfmaq_laneq_f64 (d->c6, f, c57, 1);
106+
float64x2_t p45 = vfmaq_laneq_f64 (d->c4, f, c57, 0);
107+
float64x2_t p23 = vfmaq_f64 (d->c2, f, d->c3);
108+
float64x2_t p = vfmaq_f64 (p1819, f2, d->c20);
109+
p = vfmaq_f64 (p1617, f2, p);
110+
p = vfmaq_f64 (p1415, f2, p);
111+
p = vfmaq_f64 (p1213, f2, p);
112+
p = vfmaq_f64 (p1011, f2, p);
113+
p = vfmaq_f64 (p89, f2, p);
114+
p = vfmaq_f64 (p67, f2, p);
115+
p = vfmaq_f64 (p45, f2, p);
116+
p = vfmaq_f64 (p23, f2, p);
117+
p = vfmaq_f64 (d->c1, f, p);
118+
119+
float64x2_t inv_log_consts = vld1q_f64 (&d->inv_log2_10);
120+
/* Assemble log10p1(x) = k/log2(10) + log10p1(f) + c/(m * ln10). */
121+
float64x2_t y = vfmaq_laneq_f64 (vmulq_f64 (p, f), k, inv_log_consts, 0);
122+
y = vfmaq_laneq_f64 (y, cm, inv_log_consts, 1);
123+
124+
/* Special cases: x == (-inf), x == nan, x <= -1 . */
125+
uint64x2_t special
126+
= vorrq_u64 (vcleq_f64 (x, v_f64 (d->minus_one)),
127+
vcgeq_f64 (x, vreinterpretq_f64_u64 (d->inf)));
128+
if (unlikely (v_any_u64 (special)))
129+
return special_case (d, x, y, special);
130+
return y;
131+
}
132+
#if WANT_C23_TESTS
133+
TEST_ULP (V_NAME_D1 (log10p1), 2.20)
134+
TEST_SYM_INTERVAL (V_NAME_D1 (log10p1), 0.0, 0x1p-23, 30000)
135+
TEST_SYM_INTERVAL (V_NAME_D1 (log10p1), 0x1p-23, 1, 50000)
136+
TEST_INTERVAL (V_NAME_D1 (log10p1), 1, inf, 50000)
137+
TEST_INTERVAL (V_NAME_D1 (log10p1), -1.0, -inf, 1000)
138+
#endif

math/include/mathlib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ __vpcs float64x2_t _ZGVnN2v_exp2m1 (float64x2_t);
179179
__vpcs float64x2_t _ZGVnN2v_expm1 (float64x2_t);
180180
__vpcs float64x2_t _ZGVnN2v_log (float64x2_t);
181181
__vpcs float64x2_t _ZGVnN2v_log10 (float64x2_t);
182+
__vpcs float64x2_t _ZGVnN2v_log10p1 (float64x2_t);
182183
__vpcs float64x2_t _ZGVnN2v_log1p (float64x2_t);
183184
__vpcs float64x2_t _ZGVnN2v_log2 (float64x2_t);
184185
__vpcs float64x2_t _ZGVnN2v_log2p1 (float64x2_t);

math/test/c23_references.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
# define M_PIl 3.141592653589793238462643383279502884l
1212
#endif
1313
#ifndef M_INV_LOG2l
14-
# define M_INV_LOG2l 0x1.71547652b82fep+0
14+
# define M_INV_LOG2l 0x1.71547652b82fe1777d0ffda0d23a7d11d6aef551cp+0
1515
#endif
1616
#ifndef M_INV_LOG10
1717
# define M_INV_LOG10 0x1.bcb7b1526e50ep-2
1818
#endif
19+
#ifndef M_INV_LOG10l
20+
# define M_INV_LOG10l 0x1.bcb7b1526e50e32a6ab7555f5a67b8647dc68c049p-2l
21+
#endif
1922
#ifndef M_LOG2
2023
# define M_LOG2 0x1.62e42fefa39efp-1
2124
#endif
@@ -211,3 +214,9 @@ arm_math_log10p1 (double x)
211214
{
212215
return log1p (x) * M_INV_LOG10;
213216
}
217+
218+
long double
219+
arm_math_log10p1l (long double x)
220+
{
221+
return log1pl (x) * M_INV_LOG10l;
222+
}

math/test/mathbench_funcs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ VND (_ZGVnN2v_exp2m1, -10.0, 10.0)
8181
VNF (_ZGVnN4v_log2p1f, -0.9, 10)
8282
VND (_ZGVnN2v_log2p1, -0.9, 10)
8383
VNF (_ZGVnN4v_log10p1f, -0.9, 10)
84+
VND (_ZGVnN2v_log10p1, -0.9, 10)
8485
VNF (_ZGVnN4v_sinpif, -0.9, 0.9)
8586
VND (_ZGVnN2v_sinpi, -0.9, 0.9)
8687
VNF (_ZGVnN4v_tanpif, -0.9, 0.9)

math/test/ulp_funcs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ SVF (_ZGVsMxvl8_modf_int, sv_modf_int, modfl_int, modf_mpfr_int, 1, 0, d1, 0)
8989
F (_ZGVnN4v_exp2m1f, Z_exp2m1f, arm_math_exp2m1, mpfr_exp2m1, 1, 1, f1, 0)
9090
F (_ZGVnN2v_exp2m1, Z_exp2m1, arm_math_exp2m1l, mpfr_exp2m1, 1, 0, d1, 0)
9191
F (_ZGVnN4v_log2p1f, Z_log2p1f, arm_math_log2p1, mpfr_log2p1, 1, 1, f1, 0)
92-
F (_ZGVnN2v_log2p1, Z_log2p1, arm_math_log2p1l, mpfr_log2p1, 1, 0, d1, 0)
92+
F (_ZGVnN2v_log2p1, Z_log2p1, arm_math_log2p1l, mpfr_log2p1, 1, 0, d1, 0)
9393
F (_ZGVnN4v_log10p1f, Z_log10p1f, arm_math_log10p1, mpfr_log10p1, 1, 1, f1, 0)
94+
F (_ZGVnN2v_log10p1, Z_log10p1, arm_math_log10p1l, mpfr_log10p1, 1, 0, d1, 0)
9495
F (_ZGVnN4v_sinpif, Z_sinpif, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
9596
F (_ZGVnN2v_sinpi, Z_sinpi, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
9697
F (_ZGVnN4v_tanpif, Z_tanpif, arm_math_tanpi, mpfr_tanpi, 1, 1, f1, 0)

math/test/ulp_wrappers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ ZVND1_WRAP (exp2m1)
247247
ZVNF1_WRAP (log2p1)
248248
ZVND1_WRAP (log2p1)
249249
ZVNF1_WRAP (log10p1)
250+
ZVND1_WRAP (log10p1)
250251
ZVNF1_WRAP (sinpi)
251252
ZVND1_WRAP (sinpi)
252253
ZVNF1_WRAP (tanpi)

math/tools/log10p1.sollya

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// polynomial for approximating log10(1+x) in double precision
2+
//
3+
// Copyright (c) 2025, Arm Limited.
4+
// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
5+
6+
deg = 20;
7+
8+
a = sqrt(2)/2-1;
9+
b = sqrt(2)-1;
10+
11+
f = proc(y) {
12+
return log10(1+y);
13+
};
14+
15+
poly = fpminimax(f(x), deg, [|double ...|], [a;b]);
16+
17+
print("coeffs:");
18+
display = hexadecimal;
19+
for i from 0 to deg do coeff(poly,i);
20+
print("rel error:", dirtyinfnorm(1-poly(x)/f(x), [a;b], 30));
21+
print("in [",a,b,"]");

0 commit comments

Comments
 (0)