Skip to content

Commit 8c4a97b

Browse files
committed
Bugfixes in AVX-512 optimizations of lanczos function
1 parent d1479e5 commit 8c4a97b

File tree

1 file changed

+5
-5
lines changed
  • include/private/dsp/arch/x86/avx512/pmath

1 file changed

+5
-5
lines changed

include/private/dsp/arch/x86/avx512/pmath/lanczos.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace lsp
3333
namespace avx512
3434
{
3535
IF_ARCH_X86(
36-
static const uint32_t lanczos_const[] __lsp_aligned32 =
36+
static const uint32_t lanczos_const[] __lsp_aligned64 =
3737
{
3838
LSP_DSP_VEC16(0x7fffffff), // +0x00: Mask for fabsf
3939
LSP_DSP_VEC16(0x358637bd), // +0x40: Sinc threshold = 1e-6
@@ -219,10 +219,10 @@ namespace lsp
219219
__ASM_EMIT("vmulps %%zmm4, %%zmm0, %%zmm0") /* zmm0 = sinf(x1)*sinf(x2) */ \
220220
__ASM_EMIT("vandps 0x00 + %[LC], %%zmm12, %%zmm12") /* zmm12 = fabsf(x1) */ \
221221
__ASM_EMIT("vdivps %%zmm13, %%zmm0, %%zmm0") /* zmm0 = F = (sinf(x1)*sinf(x2)) / (x1 * x2) */ \
222-
__ASM_EMIT("vcmpps $1, 0x40 + %[LC], %%zmm1, %%k1") /* k1 = [ fabsf(x) < 1e-6 ] */ \
223-
__ASM_EMIT("vcmpps $5, %%zmm10, %%zmm1, %%k2") /* k2 = [ fabsf(x) >= t ] */ \
224-
__ASM_EMIT("vmovaps 0x80 + %[LC], %%zmm0 %{%%k1%}") /* zmm0 = [ fabsf(x) >= 1e-6 ] ? f : 1.0 */ \
225-
__ASM_EMIT("vxorps %%zmm0, %%zmm0, %%zmm0 %{%%k2%}") /* zmm0 = [ fabsf(x) < t ] ? ([ fabsf(x) >= 1e-6 ] ? f : 1.0) : 0.0 */
222+
__ASM_EMIT("vcmpps $1, 0x40 + %[LC], %%zmm12, %%k1") /* k1 = [ fabsf(x1) < 1e-6 ] */ \
223+
__ASM_EMIT("vcmpps $5, %%zmm10, %%zmm1, %%k2") /* k2 = [ fabsf(x1) >= t ] */ \
224+
__ASM_EMIT("vmovaps 0x80 + %[LC], %%zmm0 %{%%k1%}") /* zmm0 = [ fabsf(x1) >= 1e-6 ] ? f : 1.0 */ \
225+
__ASM_EMIT("vxorps %%zmm0, %%zmm0, %%zmm0 %{%%k2%}") /* zmm0 = [ fabsf(x1) < t ] ? ([ fabsf(x1) >= 1e-6 ] ? f : 1.0) : 0.0 */
226226

227227
#define LANCZOS_GEN_X64_FUNC_X8 \
228228
/* ymm8 = k */ \

0 commit comments

Comments
 (0)