Skip to content

Commit 3c26d8b

Browse files
committed
Merge branch 'asahilina-main' into devel
* Refactoring and optimization of Mid/Side matrix code for AArch64 and ARM32
2 parents 25bf8d2 + 7be4dd4 commit 3c26d8b

File tree

4 files changed

+223
-388
lines changed

4 files changed

+223
-388
lines changed

include/private/dsp/arch/aarch64/asimd/msmatrix.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ namespace lsp
3333
IF_ARCH_AARCH64(
3434
static const float msmatrix_const[] __lsp_aligned16 =
3535
{
36-
LSP_DSP_VEC8(0.5f)
36+
LSP_DSP_VEC4(0.5f)
3737
};
3838
);
3939

4040
void lr_to_ms(float *m, float *s, const float *l, const float *r, size_t count)
4141
{
4242
ARCH_AARCH64_ASM(
43-
__ASM_EMIT("ldp q24, q25, [%[HALF]]") // v24 = 0.5, v25 = 0.5
43+
__ASM_EMIT("ldr q24, [%[HALF]]") // v24 = 0.5
4444
// x16 blocks
4545
__ASM_EMIT("subs %[count], %[count], #16")
4646
__ASM_EMIT("b.lo 2f")
@@ -58,13 +58,13 @@ namespace lsp
5858
__ASM_EMIT("fsub v22.4s, v2.4s, v6.4s")
5959
__ASM_EMIT("fsub v23.4s, v3.4s, v7.4s")
6060
__ASM_EMIT("fmul v0.4s, v16.4s, v24.4s") // v0 = (l + r)/2
61-
__ASM_EMIT("fmul v1.4s, v17.4s, v25.4s")
61+
__ASM_EMIT("fmul v1.4s, v17.4s, v24.4s")
6262
__ASM_EMIT("fmul v2.4s, v18.4s, v24.4s")
63-
__ASM_EMIT("fmul v3.4s, v19.4s, v25.4s")
63+
__ASM_EMIT("fmul v3.4s, v19.4s, v24.4s")
6464
__ASM_EMIT("fmul v4.4s, v20.4s, v24.4s") // v4 = (l - r)/2
65-
__ASM_EMIT("fmul v5.4s, v21.4s, v25.4s")
65+
__ASM_EMIT("fmul v5.4s, v21.4s, v24.4s")
6666
__ASM_EMIT("fmul v6.4s, v22.4s, v24.4s")
67-
__ASM_EMIT("fmul v7.4s, v23.4s, v25.4s")
67+
__ASM_EMIT("fmul v7.4s, v23.4s, v24.4s")
6868
__ASM_EMIT("stp q0, q1, [%[m], #0x00]")
6969
__ASM_EMIT("stp q2, q3, [%[m], #0x20]")
7070
__ASM_EMIT("stp q4, q5, [%[s], #0x00]")
@@ -86,9 +86,9 @@ namespace lsp
8686
__ASM_EMIT("fsub v20.4s, v0.4s, v4.4s") // v20 = l - r
8787
__ASM_EMIT("fsub v21.4s, v1.4s, v5.4s")
8888
__ASM_EMIT("fmul v0.4s, v16.4s, v24.4s") // v0 = (l + r)/2
89-
__ASM_EMIT("fmul v1.4s, v17.4s, v25.4s")
89+
__ASM_EMIT("fmul v1.4s, v17.4s, v24.4s")
9090
__ASM_EMIT("fmul v4.4s, v20.4s, v24.4s") // v4 = (l - r)/2
91-
__ASM_EMIT("fmul v5.4s, v21.4s, v25.4s")
91+
__ASM_EMIT("fmul v5.4s, v21.4s, v24.4s")
9292
__ASM_EMIT("stp q0, q1, [%[m], #0x00]")
9393
__ASM_EMIT("stp q4, q5, [%[s], #0x00]")
9494
__ASM_EMIT("sub %[count], %[count], #8")
@@ -142,7 +142,7 @@ namespace lsp
142142
"v4", "v5", "v6", "v7",
143143
"v16", "v17", "v18", "v19",
144144
"v20", "v21", "v22", "v23",
145-
"v24", "v25"
145+
"v24"
146146
);
147147
}
148148

@@ -238,7 +238,7 @@ namespace lsp
238238
}
239239

240240
#define LR_TO_PART(OP) \
241-
__ASM_EMIT("ldp q24, q25, [%[HALF]]") /* v24 = 0.5, v25 = 0.5 */ \
241+
__ASM_EMIT("ldr q24, [%[HALF]]") /* v24 = 0.5 */ \
242242
/* x16 blocks */ \
243243
__ASM_EMIT("subs %[count], %[count], #16") \
244244
__ASM_EMIT("b.lo 2f") \
@@ -252,9 +252,9 @@ namespace lsp
252252
__ASM_EMIT(OP " v18.4s, v2.4s, v6.4s") \
253253
__ASM_EMIT(OP " v19.4s, v3.4s, v7.4s") \
254254
__ASM_EMIT("fmul v0.4s, v16.4s, v24.4s") /* v0 = (l op r)/2 */ \
255-
__ASM_EMIT("fmul v1.4s, v17.4s, v25.4s") \
255+
__ASM_EMIT("fmul v1.4s, v17.4s, v24.4s") \
256256
__ASM_EMIT("fmul v2.4s, v18.4s, v24.4s") \
257-
__ASM_EMIT("fmul v3.4s, v19.4s, v25.4s") \
257+
__ASM_EMIT("fmul v3.4s, v19.4s, v24.4s") \
258258
__ASM_EMIT("stp q0, q1, [%[dst], #0x00]") \
259259
__ASM_EMIT("stp q2, q3, [%[dst], #0x20]") \
260260
__ASM_EMIT("subs %[count], %[count], #16") \
@@ -271,7 +271,7 @@ namespace lsp
271271
__ASM_EMIT(OP " v16.4s, v0.4s, v4.4s") /* v16 = l op r */ \
272272
__ASM_EMIT(OP " v17.4s, v1.4s, v5.4s") \
273273
__ASM_EMIT("fmul v0.4s, v16.4s, v24.4s") /* v0 = (l op r)/2 */ \
274-
__ASM_EMIT("fmul v1.4s, v17.4s, v25.4s") \
274+
__ASM_EMIT("fmul v1.4s, v17.4s, v24.4s") \
275275
__ASM_EMIT("stp q0, q1, [%[dst], #0x00]") \
276276
__ASM_EMIT("sub %[count], %[count], #8") \
277277
__ASM_EMIT("add %[l], %[l], #0x20") \
@@ -318,7 +318,7 @@ namespace lsp
318318
"v0", "v1", "v2", "v3",
319319
"v4", "v5", "v6", "v7",
320320
"v16", "v17", "v18", "v19",
321-
"v24", "v25"
321+
"v24"
322322
);
323323
}
324324

@@ -333,7 +333,7 @@ namespace lsp
333333
"v0", "v1", "v2", "v3",
334334
"v4", "v5", "v6", "v7",
335335
"v16", "v17", "v18", "v19",
336-
"v24", "v25"
336+
"v24"
337337
);
338338
}
339339

0 commit comments

Comments
 (0)