@@ -33,7 +33,7 @@ namespace lsp
3333 namespace avx512
3434 {
3535 IF_ARCH_X86 (
36- static const uint32_t lanczos_const[] __lsp_aligned32 =
36+ static const uint32_t lanczos_const[] __lsp_aligned64 =
3737 {
3838 LSP_DSP_VEC16 (0x7fffffff ), // +0x00: Mask for fabsf
3939 LSP_DSP_VEC16 (0x358637bd ), // +0x40: Sinc threshold = 1e-6
@@ -219,10 +219,10 @@ namespace lsp
219219 __ASM_EMIT (" vmulps %%zmm4, %%zmm0, %%zmm0" ) /* zmm0 = sinf(x1)*sinf(x2) */ \
220220 __ASM_EMIT (" vandps 0x00 + %[LC], %%zmm12, %%zmm12" ) /* zmm12 = fabsf(x1) */ \
221221 __ASM_EMIT (" vdivps %%zmm13, %%zmm0, %%zmm0" ) /* zmm0 = F = (sinf(x1)*sinf(x2)) / (x1 * x2) */ \
222- __ASM_EMIT (" vcmpps $1, 0x40 + %[LC], %%zmm1 , %%k1" ) /* k1 = [ fabsf(x ) < 1e-6 ] */ \
223- __ASM_EMIT (" vcmpps $5, %%zmm10, %%zmm1, %%k2" ) /* k2 = [ fabsf(x ) >= t ] */ \
224- __ASM_EMIT (" vmovaps 0x80 + %[LC], %%zmm0 %{%%k1%}" ) /* zmm0 = [ fabsf(x ) >= 1e-6 ] ? f : 1.0 */ \
225- __ASM_EMIT (" vxorps %%zmm0, %%zmm0, %%zmm0 %{%%k2%}" ) /* zmm0 = [ fabsf(x ) < t ] ? ([ fabsf(x ) >= 1e-6 ] ? f : 1.0) : 0.0 */
222+ __ASM_EMIT (" vcmpps $1, 0x40 + %[LC], %%zmm12 , %%k1" ) /* k1 = [ fabsf(x1 ) < 1e-6 ] */ \
223+ __ASM_EMIT (" vcmpps $5, %%zmm10, %%zmm1, %%k2" ) /* k2 = [ fabsf(x1 ) >= t ] */ \
224+ __ASM_EMIT (" vmovaps 0x80 + %[LC], %%zmm0 %{%%k1%}" ) /* zmm0 = [ fabsf(x1 ) >= 1e-6 ] ? f : 1.0 */ \
225+ __ASM_EMIT (" vxorps %%zmm0, %%zmm0, %%zmm0 %{%%k2%}" ) /* zmm0 = [ fabsf(x1 ) < t ] ? ([ fabsf(x1 ) >= 1e-6 ] ? f : 1.0) : 0.0 */
226226
227227 #define LANCZOS_GEN_X64_FUNC_X8 \
228228 /* ymm8 = k */ \
0 commit comments