p12tic
diff --git a/‎simdpp/math/log2_approx.h‎
Lines changed: 93 additions & 0 deletions b/‎simdpp/math/log2_approx.h‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎test/math_accuracy/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎test/math_accuracy/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/math_accuracy/checks.cc‎
Lines changed: 4 additions & 0 deletions b/‎test/math_accuracy/checks.cc‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎test/math_accuracy/checks.h‎
Lines changed: 2 additions & 0 deletions b/‎test/math_accuracy/checks.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎test/math_accuracy/log_approx.cc‎
Lines changed: 67 additions & 0 deletions b/‎test/math_accuracy/log_approx.cc‎
Lines changed: 67 additions & 0 deletions
@@ -0,0 +1,93 @@
+/* Copyright (C) 2024 Povilas Kanapickas <povilas@radix.lt>
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#ifndef LIBSIMDPP_SIMDPP_MATH_LOG2_APPROX_H
+#define LIBSIMDPP_SIMDPP_MATH_LOG2_APPROX_H
+
+#include <simdpp/simd.h>
+
+namespace simdpp {
+namespace SIMDPP_ARCH_NAMESPACE {
+
+/** Calculates approximate log2(x). The function is optimized for maximum speed.
+ The absolute error of the result is ... over entire range.
+
+ This version of the function requires that the argument is nonzero positive number that
+ is also not an infinity.
+*/
+template<unsigned N>
+float32<N> log2_approx_positive_finite(const float32<N>& a)
+{
+ uint32<N> exponent_mask = make_uint(0x7f800000);
+ uint32<N> exponent_for_1 = make_uint(0x3f800000);
+
+ // IEEE-754 floating-point numbers are convenient as they store the 2-based exponent as a port
+ // of their format already. The algorithm below extracts the exponent and then appliyes
+ // a polynomial to map the [1..2) mantissa to approximate value.
+
+ // extract the exponent into float value
+ auto a_int = bit_cast<uint32<N>>(a);
+ auto a_exponent_int = shift_r<23>(a_int) & 0xff;
+ auto res = to_float32(bit_cast<int32<N>>(a_exponent_int) - 128);
+
+ // extract the mantissa to the range [1..2)
+ auto mantissa = bit_cast<float32<N>>(bit_select(exponent_for_1, a_int, exponent_mask));
+
+ auto mantissa_res = -0.34484362f * mantissa + 2.02466192f;
+ mantissa_res = mantissa_res * mantissa - 0.67487591f;
+ res = res + mantissa_res;
+
+ return res;
+}
+
+/** Calculates approximate log2(x). The function is optimized for maximum speed.
+ The absolute error of the result is ... over entire range.
+
+ This version of the function handles full range of inputs including special cases correctly.
+*/
+template<unsigned N>
+float32<N> log2_approx(const float32<N>& a)
+{
+ uint32<N> exponent_mask = make_uint(0x7f800000);
+ uint32<N> exponent_for_1 = make_uint(0x3f800000);
+ float32<N> neg_infinity = make_float(-std::numeric_limits<float>::infinity());
+
+ // IEEE-754 floating-point numbers are convenient as they store the 2-based exponent as a port
+ // of their format already. The algorithm below extracts the exponent and then appliyes
+ // a polynomial to map the [1..2) mantissa to approximate value.
+
+ auto nan_mask = a < 0;
+
+ auto zero_mask = a == 0;
+ auto finite_mask = isfinite(a);
+
+ // extract the exponent into float value
+ auto a_int = bit_cast<uint32<N>>(a);
+ auto a_exponent_int = shift_r<23>(a_int) & 0xff;
+ auto res = to_float32(bit_cast<int32<N>>(a_exponent_int) - 128);
+
+ // extract the mantissa to the range [1..2)
+ auto mantissa = bit_cast<float32<N>>(bit_select(exponent_for_1, a_int, exponent_mask));
+
+ auto mantissa_res = -0.34484362f * mantissa + 2.02466192f;
+ mantissa_res = mantissa_res * mantissa - 0.67487591f;
+ res = res + mantissa_res;
+
+ // put back infinity if the argument was infinity
+ res = blend(res, a, finite_mask);
+ // put negative infinity if argument was zero
+ res = blend(neg_infinity, res, zero_mask);
+ // put NaN if argument was negative
+ res = res | nan_mask; // 0xffffffff mask is convenient because it's NaN itself
+
+ return res;
+}
+
+} // namespace simdpp
+} // namespace SIMDPP_ARCH_NAMESPACE
+
+#endif // LIBSIMDPP_SIMDPP_ALGORITHM_BITONIC_SORT
@@ -10,6 +10,7 @@ set(SOURCES
 
 set(ARCH_SOURCES
  checks.cc
+ log_approx.cc
 )
 
 set(ARCH_GEN_SOURCES "")
 
@@ -11,6 +11,10 @@ namespace SIMDPP_ARCH_NAMESPACE {
 
 void main_check_accuracy(const std::string& check_name)
 {
+ if (check_name == "" || check_name == "log2_approx") {
+ check_log2_approx_accuracy();
+ check_log2_approx_positive_finite_accuracy();
+ }
 }
 
 } // namespace SIMDPP_ARCH_NAMESPACE
 
@@ -14,6 +14,8 @@
 namespace SIMDPP_ARCH_NAMESPACE {
 
 void main_check_accuracy(const std::string& check_name);
+void check_log2_approx_accuracy();
+void check_log2_approx_positive_finite_accuracy();
 
 } // namespace SIMDPP_ARCH_NAMESPACE
 
 
@@ -0,0 +1,67 @@
+/* Copyright (C) 2011-2017 Povilas Kanapickas <povilas@radix.lt>
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#include <simdpp/simd.h>
+#include <simdpp/math/log2_approx.h>
+#include "accuracy_checker.h"
+
+namespace SIMDPP_ARCH_NAMESPACE {
+
+void check_log2_approx_positive_finite_accuracy()
+{
+ auto expected_func = [](const float* src, float* dst, std::size_t size) {
+ #pragma omp parallel for
+ for (std::size_t i = 0; i < size; ++i) {
+ *dst++ = std::log2(*src++);
+ }
+ };
+
+ auto check_func = [](const float* src, float* dst, std::size_t size) {
+ #pragma omp parallel for
+ for (std::size_t i = 0; i < size; i += simdpp::float32v::length) {
+ simdpp::float32v v = simdpp::load(src);
+ v = simdpp::log2_approx_positive_finite(v);
+ simdpp::store(dst, v);
+
+ src += simdpp::float32v::length;
+ dst += simdpp::float32v::length;
+ }
+ };
+
+ AccuracyChecker<float> checker;
+ // note that min() selects first normal value.
+ checker.set_bounds({{std::numeric_limits<float>::min(), std::numeric_limits<float>::max()}});
+ checker.run_check(expected_func, check_func);
+ checker.describe("log2_approx_positive_finite(float32)");
+}
+
+void check_log2_approx_accuracy()
+{
+ auto expected_func = [](const float* src, float* dst, std::size_t size) {
+ for (std::size_t i = 0; i < size; ++i) {
+ *dst++ = std::log2(*src++);
+ }
+ };
+
+ auto check_func = [](const float* src, float* dst, std::size_t size) {
+ for (std::size_t i = 0; i < size; i += simdpp::float32v::length) {
+ simdpp::float32v v = simdpp::load(src);
+ v = simdpp::log2_approx(v);
+ simdpp::store(dst, v);
+
+ src += simdpp::float32v::length;
+ dst += simdpp::float32v::length;
+ }
+ };
+
+ AccuracyChecker<float> checker;
+ checker.set_bounds({{std::numeric_limits<float>::min(), std::numeric_limits<float>::max()}});
+ checker.run_check(expected_func, check_func);
+ checker.describe("log2_approx(float32)");
+}
+
+} // namespace SIMDPP_ARCH_NAMESPACE
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@ set(SOURCES`
`10`	`10`
`11`	`11`	`set(ARCH_SOURCES`
`12`	`12`	`checks.cc`
	`13`	`+ log_approx.cc`
`13`	`14`	`)`
`14`	`15`
`15`	`16`	`set(ARCH_GEN_SOURCES "")`
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,10 @@ namespace SIMDPP_ARCH_NAMESPACE {`
`11`	`11`
`12`	`12`	`void main_check_accuracy(const std::string& check_name)`
`13`	`13`	`{`
	`14`	`+ if (check_name == "" \|\| check_name == "log2_approx") {`
	`15`	`+ check_log2_approx_accuracy();`
	`16`	`+ check_log2_approx_positive_finite_accuracy();`
	`17`	`+ }`
`14`	`18`	`}`
`15`	`19`
`16`	`20`	`} // namespace SIMDPP_ARCH_NAMESPACE`