Skip to content

Commit becc949

Browse files
committed
math: Add approximate log2() function for float32
1 parent 6bac8f5 commit becc949

File tree

5 files changed

+167
-0
lines changed

5 files changed

+167
-0
lines changed

simdpp/math/log2_approx.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/* Copyright (C) 2024 Povilas Kanapickas <povilas@radix.lt>
2+
3+
Distributed under the Boost Software License, Version 1.0.
4+
(See accompanying file LICENSE_1_0.txt or copy at
5+
http://www.boost.org/LICENSE_1_0.txt)
6+
*/
7+
8+
#ifndef LIBSIMDPP_SIMDPP_MATH_LOG2_APPROX_H
9+
#define LIBSIMDPP_SIMDPP_MATH_LOG2_APPROX_H
10+
11+
#include <simdpp/simd.h>
12+
13+
namespace simdpp {
14+
namespace SIMDPP_ARCH_NAMESPACE {
15+
16+
/** Calculates approximate log2(x). The function is optimized for maximum speed.
17+
The absolute error of the result is ... over entire range.
18+
19+
This version of the function requires that the argument is nonzero positive number that
20+
is also not an infinity.
21+
*/
22+
template<unsigned N>
23+
float32<N> log2_approx_positive_finite(const float32<N>& a)
24+
{
25+
uint32<N> exponent_mask = make_uint(0x7f800000);
26+
uint32<N> exponent_for_1 = make_uint(0x3f800000);
27+
28+
// IEEE-754 floating-point numbers are convenient as they store the 2-based exponent as a port
29+
// of their format already. The algorithm below extracts the exponent and then appliyes
30+
// a polynomial to map the [1..2) mantissa to approximate value.
31+
32+
// extract the exponent into float value
33+
auto a_int = bit_cast<uint32<N>>(a);
34+
auto a_exponent_int = shift_r<23>(a_int) & 0xff;
35+
auto res = to_float32(bit_cast<int32<N>>(a_exponent_int) - 128);
36+
37+
// extract the mantissa to the range [1..2)
38+
auto mantissa = bit_cast<float32<N>>(bit_select(exponent_for_1, a_int, exponent_mask));
39+
40+
auto mantissa_res = -0.34484362f * mantissa + 2.02466192f;
41+
mantissa_res = mantissa_res * mantissa - 0.67487591f;
42+
res = res + mantissa_res;
43+
44+
return res;
45+
}
46+
47+
/** Calculates approximate log2(x). The function is optimized for maximum speed.
48+
The absolute error of the result is ... over entire range.
49+
50+
This version of the function handles full range of inputs including special cases correctly.
51+
*/
52+
template<unsigned N>
53+
float32<N> log2_approx(const float32<N>& a)
54+
{
55+
uint32<N> exponent_mask = make_uint(0x7f800000);
56+
uint32<N> exponent_for_1 = make_uint(0x3f800000);
57+
float32<N> neg_infinity = make_float(-std::numeric_limits<float>::infinity());
58+
59+
// IEEE-754 floating-point numbers are convenient as they store the 2-based exponent as a port
60+
// of their format already. The algorithm below extracts the exponent and then appliyes
61+
// a polynomial to map the [1..2) mantissa to approximate value.
62+
63+
auto nan_mask = a < 0;
64+
65+
auto zero_mask = a == 0;
66+
auto finite_mask = isfinite(a);
67+
68+
// extract the exponent into float value
69+
auto a_int = bit_cast<uint32<N>>(a);
70+
auto a_exponent_int = shift_r<23>(a_int) & 0xff;
71+
auto res = to_float32(bit_cast<int32<N>>(a_exponent_int) - 128);
72+
73+
// extract the mantissa to the range [1..2)
74+
auto mantissa = bit_cast<float32<N>>(bit_select(exponent_for_1, a_int, exponent_mask));
75+
76+
auto mantissa_res = -0.34484362f * mantissa + 2.02466192f;
77+
mantissa_res = mantissa_res * mantissa - 0.67487591f;
78+
res = res + mantissa_res;
79+
80+
// put back infinity if the argument was infinity
81+
res = blend(res, a, finite_mask);
82+
// put negative infinity if argument was zero
83+
res = blend(neg_infinity, res, zero_mask);
84+
// put NaN if argument was negative
85+
res = res | nan_mask; // 0xffffffff mask is convenient because it's NaN itself
86+
87+
return res;
88+
}
89+
90+
} // namespace simdpp
91+
} // namespace SIMDPP_ARCH_NAMESPACE
92+
93+
#endif // LIBSIMDPP_SIMDPP_ALGORITHM_BITONIC_SORT

test/math_accuracy/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(SOURCES
1010

1111
set(ARCH_SOURCES
1212
checks.cc
13+
log_approx.cc
1314
)
1415

1516
set(ARCH_GEN_SOURCES "")

test/math_accuracy/checks.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ namespace SIMDPP_ARCH_NAMESPACE {
1111

1212
void main_check_accuracy(const std::string& check_name)
1313
{
14+
if (check_name == "" || check_name == "log2_approx") {
15+
check_log2_approx_accuracy();
16+
check_log2_approx_positive_finite_accuracy();
17+
}
1418
}
1519

1620
} // namespace SIMDPP_ARCH_NAMESPACE

test/math_accuracy/checks.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
namespace SIMDPP_ARCH_NAMESPACE {
1515

1616
void main_check_accuracy(const std::string& check_name);
17+
void check_log2_approx_accuracy();
18+
void check_log2_approx_positive_finite_accuracy();
1719

1820
} // namespace SIMDPP_ARCH_NAMESPACE
1921

test/math_accuracy/log_approx.cc

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/* Copyright (C) 2011-2017 Povilas Kanapickas <povilas@radix.lt>
2+
3+
Distributed under the Boost Software License, Version 1.0.
4+
(See accompanying file LICENSE_1_0.txt or copy at
5+
http://www.boost.org/LICENSE_1_0.txt)
6+
*/
7+
8+
#include <simdpp/simd.h>
9+
#include <simdpp/math/log2_approx.h>
10+
#include "accuracy_checker.h"
11+
12+
namespace SIMDPP_ARCH_NAMESPACE {
13+
14+
void check_log2_approx_positive_finite_accuracy()
15+
{
16+
auto expected_func = [](const float* src, float* dst, std::size_t size) {
17+
#pragma omp parallel for
18+
for (std::size_t i = 0; i < size; ++i) {
19+
*dst++ = std::log2(*src++);
20+
}
21+
};
22+
23+
auto check_func = [](const float* src, float* dst, std::size_t size) {
24+
#pragma omp parallel for
25+
for (std::size_t i = 0; i < size; i += simdpp::float32v::length) {
26+
simdpp::float32v v = simdpp::load(src);
27+
v = simdpp::log2_approx_positive_finite(v);
28+
simdpp::store(dst, v);
29+
30+
src += simdpp::float32v::length;
31+
dst += simdpp::float32v::length;
32+
}
33+
};
34+
35+
AccuracyChecker<float> checker;
36+
// note that min() selects first normal value.
37+
checker.set_bounds({{std::numeric_limits<float>::min(), std::numeric_limits<float>::max()}});
38+
checker.run_check(expected_func, check_func);
39+
checker.describe("log2_approx_positive_finite(float32)");
40+
}
41+
42+
void check_log2_approx_accuracy()
43+
{
44+
auto expected_func = [](const float* src, float* dst, std::size_t size) {
45+
for (std::size_t i = 0; i < size; ++i) {
46+
*dst++ = std::log2(*src++);
47+
}
48+
};
49+
50+
auto check_func = [](const float* src, float* dst, std::size_t size) {
51+
for (std::size_t i = 0; i < size; i += simdpp::float32v::length) {
52+
simdpp::float32v v = simdpp::load(src);
53+
v = simdpp::log2_approx(v);
54+
simdpp::store(dst, v);
55+
56+
src += simdpp::float32v::length;
57+
dst += simdpp::float32v::length;
58+
}
59+
};
60+
61+
AccuracyChecker<float> checker;
62+
checker.set_bounds({{std::numeric_limits<float>::min(), std::numeric_limits<float>::max()}});
63+
checker.run_check(expected_func, check_func);
64+
checker.describe("log2_approx(float32)");
65+
}
66+
67+
} // namespace SIMDPP_ARCH_NAMESPACE

0 commit comments

Comments
 (0)