@@ -33,14 +33,14 @@ namespace lsp
33
33
IF_ARCH_AARCH64 (
34
34
static const float msmatrix_const[] __lsp_aligned16 =
35
35
{
36
- LSP_DSP_VEC8 (0 .5f )
36
+ LSP_DSP_VEC4 (0 .5f )
37
37
};
38
38
);
39
39
40
40
void lr_to_ms (float *m, float *s, const float *l, const float *r, size_t count)
41
41
{
42
42
ARCH_AARCH64_ASM (
43
- __ASM_EMIT (" ldp q24, q25, [%[HALF]]" ) // v24 = 0.5, v25 = 0.5
43
+ __ASM_EMIT (" ldr q24, [%[HALF]]" ) // v24 = 0.5
44
44
// x16 blocks
45
45
__ASM_EMIT (" subs %[count], %[count], #16" )
46
46
__ASM_EMIT (" b.lo 2f" )
@@ -58,13 +58,13 @@ namespace lsp
58
58
__ASM_EMIT (" fsub v22.4s, v2.4s, v6.4s" )
59
59
__ASM_EMIT (" fsub v23.4s, v3.4s, v7.4s" )
60
60
__ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) // v0 = (l + r)/2
61
- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" )
61
+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" )
62
62
__ASM_EMIT (" fmul v2.4s, v18.4s, v24.4s" )
63
- __ASM_EMIT (" fmul v3.4s, v19.4s, v25 .4s" )
63
+ __ASM_EMIT (" fmul v3.4s, v19.4s, v24 .4s" )
64
64
__ASM_EMIT (" fmul v4.4s, v20.4s, v24.4s" ) // v4 = (l - r)/2
65
- __ASM_EMIT (" fmul v5.4s, v21.4s, v25 .4s" )
65
+ __ASM_EMIT (" fmul v5.4s, v21.4s, v24 .4s" )
66
66
__ASM_EMIT (" fmul v6.4s, v22.4s, v24.4s" )
67
- __ASM_EMIT (" fmul v7.4s, v23.4s, v25 .4s" )
67
+ __ASM_EMIT (" fmul v7.4s, v23.4s, v24 .4s" )
68
68
__ASM_EMIT (" stp q0, q1, [%[m], #0x00]" )
69
69
__ASM_EMIT (" stp q2, q3, [%[m], #0x20]" )
70
70
__ASM_EMIT (" stp q4, q5, [%[s], #0x00]" )
@@ -86,9 +86,9 @@ namespace lsp
86
86
__ASM_EMIT (" fsub v20.4s, v0.4s, v4.4s" ) // v20 = l - r
87
87
__ASM_EMIT (" fsub v21.4s, v1.4s, v5.4s" )
88
88
__ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) // v0 = (l + r)/2
89
- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" )
89
+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" )
90
90
__ASM_EMIT (" fmul v4.4s, v20.4s, v24.4s" ) // v4 = (l - r)/2
91
- __ASM_EMIT (" fmul v5.4s, v21.4s, v25 .4s" )
91
+ __ASM_EMIT (" fmul v5.4s, v21.4s, v24 .4s" )
92
92
__ASM_EMIT (" stp q0, q1, [%[m], #0x00]" )
93
93
__ASM_EMIT (" stp q4, q5, [%[s], #0x00]" )
94
94
__ASM_EMIT (" sub %[count], %[count], #8" )
@@ -142,7 +142,7 @@ namespace lsp
142
142
" v4" , " v5" , " v6" , " v7" ,
143
143
" v16" , " v17" , " v18" , " v19" ,
144
144
" v20" , " v21" , " v22" , " v23" ,
145
- " v24" , " v25 "
145
+ " v24"
146
146
);
147
147
}
148
148
@@ -238,7 +238,7 @@ namespace lsp
238
238
}
239
239
240
240
#define LR_TO_PART (OP ) \
241
- __ASM_EMIT (" ldp q24, q25, [%[HALF]]" ) /* v24 = 0.5, v25 = 0.5 */ \
241
+ __ASM_EMIT (" ldr q24, [%[HALF]]" ) /* v24 = 0.5 */ \
242
242
/* x16 blocks */ \
243
243
__ASM_EMIT (" subs %[count], %[count], #16" ) \
244
244
__ASM_EMIT (" b.lo 2f" ) \
@@ -252,9 +252,9 @@ namespace lsp
252
252
__ASM_EMIT (OP " v18.4s, v2.4s, v6.4s" ) \
253
253
__ASM_EMIT (OP " v19.4s, v3.4s, v7.4s" ) \
254
254
__ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) /* v0 = (l op r)/2 */ \
255
- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" ) \
255
+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" ) \
256
256
__ASM_EMIT (" fmul v2.4s, v18.4s, v24.4s" ) \
257
- __ASM_EMIT (" fmul v3.4s, v19.4s, v25 .4s" ) \
257
+ __ASM_EMIT (" fmul v3.4s, v19.4s, v24 .4s" ) \
258
258
__ASM_EMIT (" stp q0, q1, [%[dst], #0x00]" ) \
259
259
__ASM_EMIT (" stp q2, q3, [%[dst], #0x20]" ) \
260
260
__ASM_EMIT (" subs %[count], %[count], #16" ) \
@@ -271,7 +271,7 @@ namespace lsp
271
271
__ASM_EMIT (OP " v16.4s, v0.4s, v4.4s" ) /* v16 = l op r */ \
272
272
__ASM_EMIT (OP " v17.4s, v1.4s, v5.4s" ) \
273
273
__ASM_EMIT (" fmul v0.4s, v16.4s, v24.4s" ) /* v0 = (l op r)/2 */ \
274
- __ASM_EMIT (" fmul v1.4s, v17.4s, v25 .4s" ) \
274
+ __ASM_EMIT (" fmul v1.4s, v17.4s, v24 .4s" ) \
275
275
__ASM_EMIT (" stp q0, q1, [%[dst], #0x00]" ) \
276
276
__ASM_EMIT (" sub %[count], %[count], #8" ) \
277
277
__ASM_EMIT (" add %[l], %[l], #0x20" ) \
@@ -318,7 +318,7 @@ namespace lsp
318
318
" v0" , " v1" , " v2" , " v3" ,
319
319
" v4" , " v5" , " v6" , " v7" ,
320
320
" v16" , " v17" , " v18" , " v19" ,
321
- " v24" , " v25 "
321
+ " v24"
322
322
);
323
323
}
324
324
@@ -333,7 +333,7 @@ namespace lsp
333
333
" v0" , " v1" , " v2" , " v3" ,
334
334
" v4" , " v5" , " v6" , " v7" ,
335
335
" v16" , " v17" , " v18" , " v19" ,
336
- " v24" , " v25 "
336
+ " v24"
337
337
);
338
338
}
339
339
0 commit comments