Skip to content

Commit 25cbeef

Browse files
committed
First matrix operations for AVX
1 parent 06292bb commit 25cbeef

File tree

4 files changed

+182
-10
lines changed

4 files changed

+182
-10
lines changed

include/private/dsp/arch/x86/avx/3dmath.h

Lines changed: 134 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,64 @@
4141
__ASM_EMIT("vdivps %" x1 ", %" x0 ", %" x0) /* x0 = x/w y/w z/w w/w */ \
4242
__ASM_EMIT("1000000:")
4343

44+
// Load matrix
45+
// ptr = address of matrix
46+
// x0 = row 0
47+
// x1 = row 1
48+
// x2 = row 2
49+
// x3 = row 3
50+
#define MATRIX_LOAD(ptr, x0, x1, x2, x3) \
51+
__ASM_EMIT("vmovups 0x00(%[" ptr "]), %" x0 ) \
52+
__ASM_EMIT("vmovups 0x10(%[" ptr "]), %" x1 ) \
53+
__ASM_EMIT("vmovups 0x20(%[" ptr "]), %" x2 ) \
54+
__ASM_EMIT("vmovups 0x30(%[" ptr "]), %" x3 )
55+
56+
// Store matrix
57+
// ptr = address of matrix
58+
// x0 = row 0
59+
// x1 = row 1
60+
// x2 = row 2
61+
// x3 = row 3
62+
#define MATRIX_STORE(ptr, x0, x1, x2, x3) \
63+
__ASM_EMIT("vmovups %" x0 ", 0x00(%[" ptr "])") \
64+
__ASM_EMIT("vmovups %" x1 ", 0x10(%[" ptr "])") \
65+
__ASM_EMIT("vmovups %" x2 ", 0x20(%[" ptr "])") \
66+
__ASM_EMIT("vmovups %" x3 ", 0x30(%[" ptr "])")
67+
68+
// Transpose matrix
69+
// x0 = row 0
70+
// x1 = row 1
71+
// x2 = row 2
72+
// x3 = row 3
73+
// x4 = temp
74+
#define MAT4_TRANSPOSE(x0, x1, x2, x3, x4) \
75+
__ASM_EMIT("vpunpckhdq %" x3 ", %" x2 ", %" x4) /* x4 = c3 d3 c4 d4 */ \
76+
__ASM_EMIT("vpunpckldq %" x3 ", %" x2 ", %" x2) /* x2 = c1 d1 c2 d2 */ \
77+
__ASM_EMIT("vpunpckhdq %" x1 ", %" x0 ", %" x3) /* x3 = a3 b3 a4 b4 */ \
78+
__ASM_EMIT("vpunpckldq %" x1 ", %" x0 ", %" x0) /* x0 = a1 b1 a2 b2 */ \
79+
__ASM_EMIT("vpunpckhqdq %" x2 ", %" x0 ", %" x1) /* x1 = a2 b2 c2 d2 */ \
80+
__ASM_EMIT("vpunpcklqdq %" x2 ", %" x0 ", %" x0) /* x0 = a1 b1 c1 d1 */ \
81+
__ASM_EMIT("vpunpcklqdq %" x4 ", %" x3 ", %" x2) /* x2 = a3 b3 c3 d3 */ \
82+
__ASM_EMIT("vpunpckhqdq %" x4 ", %" x3 ", %" x3) /* x3 = a4 b4 c4 d4 */
83+
84+
4485
namespace lsp
4586
{
4687
namespace avx
4788
{
4889
using namespace dsp;
4990

5091
IF_ARCH_X86(
51-
static const float IDENTITY[16] __lsp_aligned16 =
92+
static const float IDENTITY[16] __lsp_aligned32 =
5293
{
5394
1.0f, 0.0f, 0.0f, 0.0f,
5495
0.0f, 1.0f, 0.0f, 0.0f,
5596
0.0f, 0.0f, 1.0f, 0.0f,
5697
0.0f, 0.0f, 0.0f, 1.0f
5798
};
99+
static const float ONE[] __lsp_aligned32 = { LSP_DSP_VEC8(1.0f) };
58100

59-
static const uint32_t X_MASK0111[] __lsp_aligned16 = { 0xffffffff, 0xffffffff, 0xffffffff, 0 };
101+
static const uint32_t X_MASK0111[] __lsp_aligned32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0 };
60102
)
61103

62104
void init_point_xyz(point3d_t *p, float x, float y, float z)
@@ -178,6 +220,96 @@ namespace lsp
178220
);
179221
}
180222

223+
void init_matrix3d(matrix3d_t *dst, const matrix3d_t *src)
224+
{
225+
ARCH_X86_ASM
226+
(
227+
__ASM_EMIT("vmovups 0x00(%[s]), %%ymm0")
228+
__ASM_EMIT("vmovups 0x20(%[s]), %%ymm1")
229+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[d])")
230+
__ASM_EMIT("vmovups %%ymm1, 0x20(%[d])")
231+
:
232+
: [s] "r" (src), [d] "r" (dst)
233+
: "memory",
234+
"%xmm0", "%xmm1"
235+
);
236+
}
237+
238+
void init_matrix3d_zero(matrix3d_t *m)
239+
{
240+
ARCH_X86_ASM
241+
(
242+
__ASM_EMIT("vxorps %%ymm0, %%ymm0, %%ymm0")
243+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[m])")
244+
__ASM_EMIT("vmovups %%ymm1, 0x20(%[m])")
245+
:
246+
: [m] "r" (m)
247+
: "memory",
248+
"%xmm0"
249+
);
250+
}
251+
252+
void init_matrix3d_one(matrix3d_t *m)
253+
{
254+
ARCH_X86_ASM
255+
(
256+
__ASM_EMIT("vmovaps %[one], %%ymm0")
257+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[m])")
258+
__ASM_EMIT("vmovups %%ymm0, 0x20(%[m])")
259+
:
260+
: [m] "r" (m),
261+
[one] "m" (ONE)
262+
: "memory",
263+
"%xmm0"
264+
);
265+
}
266+
267+
void init_matrix3d_identity(matrix3d_t *m)
268+
{
269+
ARCH_X86_ASM
270+
(
271+
__ASM_EMIT("vmovaps 0x00 + %[id], %%ymm0")
272+
__ASM_EMIT("vmovaps 0x20 + %[id], %%ymm1")
273+
__ASM_EMIT("vmovups %%ymm0, 0x00(%[m])")
274+
__ASM_EMIT("vmovups %%ymm1, 0x20(%[m])")
275+
:
276+
: [m] "r" (m),
277+
[id] "o" (IDENTITY)
278+
: "memory",
279+
"%xmm0", "%xmm1"
280+
);
281+
}
282+
283+
void transpose_matrix3d1(matrix3d_t *r)
284+
{
285+
ARCH_X86_ASM
286+
(
287+
MATRIX_LOAD("m", "%xmm0", "%xmm1", "%xmm2", "%xmm3")
288+
MAT4_TRANSPOSE("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4")
289+
MATRIX_STORE("m", "%xmm0", "%xmm1", "%xmm2", "%xmm3")
290+
:
291+
: [m] "r" (r)
292+
: "memory",
293+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
294+
"%xmm4"
295+
);
296+
}
297+
298+
void transpose_matrix3d2(matrix3d_t *r, const matrix3d_t *m)
299+
{
300+
ARCH_X86_ASM
301+
(
302+
MATRIX_LOAD("m", "%xmm0", "%xmm1", "%xmm2", "%xmm3")
303+
MAT4_TRANSPOSE("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4")
304+
MATRIX_STORE("r", "%xmm0", "%xmm1", "%xmm2", "%xmm3")
305+
:
306+
: [r] "r" (r), [m] "r" (m)
307+
: "memory",
308+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
309+
"%xmm4"
310+
);
311+
}
312+
181313
} /* namespace avx */
182314
} /* namespace lsp */
183315

include/private/dsp/arch/x86/sse/3dmath.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -599,12 +599,11 @@ namespace lsp
599599
ARCH_X86_ASM
600600
(
601601
__ASM_EMIT("xorps %%xmm0, %%xmm0")
602-
__ASM_EMIT("xorps %%xmm1, %%xmm1")
603-
MATRIX_STORE("m", "%xmm0", "%xmm1", "%xmm0", "%xmm1")
602+
MATRIX_STORE("m", "%xmm0", "%xmm0", "%xmm0", "%xmm0")
604603
:
605604
: [m] "r" (m)
606605
: "memory",
607-
"%xmm0", "%xmm1"
606+
"%xmm0"
608607
);
609608
}
610609

@@ -613,12 +612,11 @@ namespace lsp
613612
ARCH_X86_ASM
614613
(
615614
__ASM_EMIT("movaps %[one], %%xmm0")
616-
__ASM_EMIT("movaps %%xmm0, %%xmm1")
617-
MATRIX_STORE("m", "%xmm0", "%xmm1", "%xmm0", "%xmm1")
615+
MATRIX_STORE("m", "%xmm0", "%xmm0", "%xmm0", "%xmm0")
618616
:
619617
: [m] "r" (m), [one] "m" (ONE)
620618
: "memory",
621-
"%xmm0", "%xmm1"
619+
"%xmm0"
622620
);
623621
}
624622

src/main/x86/avx.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,25 @@
435435
EXPORT1(normalize_vector);
436436
EXPORT1(normalize_vector2);
437437

438+
EXPORT1(init_matrix3d);
439+
EXPORT1(init_matrix3d_zero);
440+
EXPORT1(init_matrix3d_one);
441+
EXPORT1(init_matrix3d_identity);
442+
// EXPORT1(init_matrix3d_translate);
443+
// EXPORT1(init_matrix3d_scale);
444+
// EXPORT1(init_matrix3d_rotate_x);
445+
// EXPORT1(init_matrix3d_rotate_y);
446+
// EXPORT1(init_matrix3d_rotate_z);
447+
// EXPORT1(init_matrix3d_rotate_xyz);
448+
// EXPORT1(apply_matrix3d_mv2);
449+
// EXPORT1(apply_matrix3d_mv1);
450+
// EXPORT1(apply_matrix3d_mp2);
451+
// EXPORT1(apply_matrix3d_mp1);
452+
// EXPORT1(apply_matrix3d_mm2);
453+
// EXPORT1(apply_matrix3d_mm1);
454+
EXPORT1(transpose_matrix3d1);
455+
EXPORT1(transpose_matrix3d2);
456+
438457
// FMA3 support?
439458
if (f->features & CPU_OPTION_FMA3)
440459
{

src/test/utest/3d/matrix.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ namespace lsp
7575
void apply_matrix3d_mm2(dsp::matrix3d_t *r, const dsp::matrix3d_t *s, const dsp::matrix3d_t *m);
7676
void apply_matrix3d_mm1(dsp::matrix3d_t *r, const dsp::matrix3d_t *m);
7777
}
78+
79+
namespace avx
80+
{
81+
void init_matrix3d(dsp::matrix3d_t *dst, const dsp::matrix3d_t *src);
82+
void init_matrix3d_zero(dsp::matrix3d_t *m);
83+
void init_matrix3d_one(dsp::matrix3d_t *m);
84+
void init_matrix3d_identity(dsp::matrix3d_t *m);
85+
void transpose_matrix3d1(dsp::matrix3d_t *r);
86+
void transpose_matrix3d2(dsp::matrix3d_t *r, const dsp::matrix3d_t *m);
87+
}
7888
)
7989

8090
typedef void (* init_matrix3d_t)(dsp::matrix3d_t *dst, const dsp::matrix3d_t *src);
@@ -441,8 +451,21 @@ UTEST_BEGIN("dsp.3d", matrix)
441451
UTEST_MAIN
442452
{
443453
IF_ARCH_X86(init_data("sse init_matrix",
444-
sse::init_matrix3d, sse::init_matrix3d_zero, sse::init_matrix3d_one, sse::init_matrix3d_identity,
445-
sse::transpose_matrix3d1, sse::transpose_matrix3d2
454+
sse::init_matrix3d,
455+
sse::init_matrix3d_zero,
456+
sse::init_matrix3d_one,
457+
sse::init_matrix3d_identity,
458+
sse::transpose_matrix3d1,
459+
sse::transpose_matrix3d2
460+
));
461+
462+
IF_ARCH_X86(init_data("avx init_matrix",
463+
avx::init_matrix3d,
464+
avx::init_matrix3d_zero,
465+
avx::init_matrix3d_one,
466+
avx::init_matrix3d_identity,
467+
avx::transpose_matrix3d1,
468+
avx::transpose_matrix3d2
446469
));
447470

448471
IF_ARCH_X86(transform("sse init_matrix_transform",

0 commit comments

Comments
 (0)