|
41 | 41 | __ASM_EMIT("vdivps %" x1 ", %" x0 ", %" x0) /* x0 = x/w y/w z/w w/w */ \
|
42 | 42 | __ASM_EMIT("1000000:")
|
43 | 43 |
|
| 44 | +// Load matrix |
| 45 | +// ptr = address of matrix |
| 46 | +// x0 = row 0 |
| 47 | +// x1 = row 1 |
| 48 | +// x2 = row 2 |
| 49 | +// x3 = row 3 |
| 50 | +#define MATRIX_LOAD(ptr, x0, x1, x2, x3) \ |
| 51 | + __ASM_EMIT("vmovups 0x00(%[" ptr "]), %" x0 ) \ |
| 52 | + __ASM_EMIT("vmovups 0x10(%[" ptr "]), %" x1 ) \ |
| 53 | + __ASM_EMIT("vmovups 0x20(%[" ptr "]), %" x2 ) \ |
| 54 | + __ASM_EMIT("vmovups 0x30(%[" ptr "]), %" x3 ) |
| 55 | + |
| 56 | +// Store matrix |
| 57 | +// ptr = address of matrix |
| 58 | +// x0 = row 0 |
| 59 | +// x1 = row 1 |
| 60 | +// x2 = row 2 |
| 61 | +// x3 = row 3 |
| 62 | +#define MATRIX_STORE(ptr, x0, x1, x2, x3) \ |
| 63 | + __ASM_EMIT("vmovups %" x0 ", 0x00(%[" ptr "])") \ |
| 64 | + __ASM_EMIT("vmovups %" x1 ", 0x10(%[" ptr "])") \ |
| 65 | + __ASM_EMIT("vmovups %" x2 ", 0x20(%[" ptr "])") \ |
| 66 | + __ASM_EMIT("vmovups %" x3 ", 0x30(%[" ptr "])") |
| 67 | + |
| 68 | +// Transpose matrix |
| 69 | +// x0 = row 0 |
| 70 | +// x1 = row 1 |
| 71 | +// x2 = row 2 |
| 72 | +// x3 = row 3 |
| 73 | +// x4 = temp |
| 74 | +#define MAT4_TRANSPOSE(x0, x1, x2, x3, x4) \ |
| 75 | + __ASM_EMIT("vpunpckhdq %" x3 ", %" x2 ", %" x4) /* x4 = c3 d3 c4 d4 */ \ |
| 76 | + __ASM_EMIT("vpunpckldq %" x3 ", %" x2 ", %" x2) /* x2 = c1 d1 c2 d2 */ \ |
| 77 | + __ASM_EMIT("vpunpckhdq %" x1 ", %" x0 ", %" x3) /* x3 = a3 b3 a4 b4 */ \ |
| 78 | + __ASM_EMIT("vpunpckldq %" x1 ", %" x0 ", %" x0) /* x0 = a1 b1 a2 b2 */ \ |
| 79 | + __ASM_EMIT("vpunpckhqdq %" x2 ", %" x0 ", %" x1) /* x1 = a2 b2 c2 d2 */ \ |
| 80 | + __ASM_EMIT("vpunpcklqdq %" x2 ", %" x0 ", %" x0) /* x0 = a1 b1 c1 d1 */ \ |
| 81 | + __ASM_EMIT("vpunpcklqdq %" x4 ", %" x3 ", %" x2) /* x2 = a3 b3 c3 d3 */ \ |
| 82 | + __ASM_EMIT("vpunpckhqdq %" x4 ", %" x3 ", %" x3) /* x3 = a4 b4 c4 d4 */ |
| 83 | + |
| 84 | + |
44 | 85 | namespace lsp
|
45 | 86 | {
|
46 | 87 | namespace avx
|
47 | 88 | {
|
48 | 89 | using namespace dsp;
|
49 | 90 |
|
50 | 91 | IF_ARCH_X86(
|
51 |
| - static const float IDENTITY[16] __lsp_aligned16 = |
| 92 | + static const float IDENTITY[16] __lsp_aligned32 = |
52 | 93 | {
|
53 | 94 | 1.0f, 0.0f, 0.0f, 0.0f,
|
54 | 95 | 0.0f, 1.0f, 0.0f, 0.0f,
|
55 | 96 | 0.0f, 0.0f, 1.0f, 0.0f,
|
56 | 97 | 0.0f, 0.0f, 0.0f, 1.0f
|
57 | 98 | };
|
| 99 | + static const float ONE[] __lsp_aligned32 = { LSP_DSP_VEC8(1.0f) }; |
58 | 100 |
|
59 |
| - static const uint32_t X_MASK0111[] __lsp_aligned16 = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; |
| 101 | + static const uint32_t X_MASK0111[] __lsp_aligned32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; |
60 | 102 | )
|
61 | 103 |
|
62 | 104 | void init_point_xyz(point3d_t *p, float x, float y, float z)
|
@@ -178,6 +220,96 @@ namespace lsp
|
178 | 220 | );
|
179 | 221 | }
|
180 | 222 |
|
| 223 | + void init_matrix3d(matrix3d_t *dst, const matrix3d_t *src) |
| 224 | + { |
| 225 | + ARCH_X86_ASM |
| 226 | + ( |
| 227 | + __ASM_EMIT("vmovups 0x00(%[s]), %%ymm0") |
| 228 | + __ASM_EMIT("vmovups 0x20(%[s]), %%ymm1") |
| 229 | + __ASM_EMIT("vmovups %%ymm0, 0x00(%[d])") |
| 230 | + __ASM_EMIT("vmovups %%ymm1, 0x20(%[d])") |
| 231 | + : |
| 232 | + : [s] "r" (src), [d] "r" (dst) |
| 233 | + : "memory", |
| 234 | + "%xmm0", "%xmm1" |
| 235 | + ); |
| 236 | + } |
| 237 | + |
| 238 | + void init_matrix3d_zero(matrix3d_t *m) |
| 239 | + { |
| 240 | + ARCH_X86_ASM |
| 241 | + ( |
| 242 | + __ASM_EMIT("vxorps %%ymm0, %%ymm0, %%ymm0") |
| 243 | + __ASM_EMIT("vmovups %%ymm0, 0x00(%[m])") |
| 244 | + __ASM_EMIT("vmovups %%ymm1, 0x20(%[m])") |
| 245 | + : |
| 246 | + : [m] "r" (m) |
| 247 | + : "memory", |
| 248 | + "%xmm0" |
| 249 | + ); |
| 250 | + } |
| 251 | + |
| 252 | + void init_matrix3d_one(matrix3d_t *m) |
| 253 | + { |
| 254 | + ARCH_X86_ASM |
| 255 | + ( |
| 256 | + __ASM_EMIT("vmovaps %[one], %%ymm0") |
| 257 | + __ASM_EMIT("vmovups %%ymm0, 0x00(%[m])") |
| 258 | + __ASM_EMIT("vmovups %%ymm0, 0x20(%[m])") |
| 259 | + : |
| 260 | + : [m] "r" (m), |
| 261 | + [one] "m" (ONE) |
| 262 | + : "memory", |
| 263 | + "%xmm0" |
| 264 | + ); |
| 265 | + } |
| 266 | + |
| 267 | + void init_matrix3d_identity(matrix3d_t *m) |
| 268 | + { |
| 269 | + ARCH_X86_ASM |
| 270 | + ( |
| 271 | + __ASM_EMIT("vmovaps 0x00 + %[id], %%ymm0") |
| 272 | + __ASM_EMIT("vmovaps 0x20 + %[id], %%ymm1") |
| 273 | + __ASM_EMIT("vmovups %%ymm0, 0x00(%[m])") |
| 274 | + __ASM_EMIT("vmovups %%ymm1, 0x20(%[m])") |
| 275 | + : |
| 276 | + : [m] "r" (m), |
| 277 | + [id] "o" (IDENTITY) |
| 278 | + : "memory", |
| 279 | + "%xmm0", "%xmm1" |
| 280 | + ); |
| 281 | + } |
| 282 | + |
| 283 | + void transpose_matrix3d1(matrix3d_t *r) |
| 284 | + { |
| 285 | + ARCH_X86_ASM |
| 286 | + ( |
| 287 | + MATRIX_LOAD("m", "%xmm0", "%xmm1", "%xmm2", "%xmm3") |
| 288 | + MAT4_TRANSPOSE("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4") |
| 289 | + MATRIX_STORE("m", "%xmm0", "%xmm1", "%xmm2", "%xmm3") |
| 290 | + : |
| 291 | + : [m] "r" (r) |
| 292 | + : "memory", |
| 293 | + "%xmm0", "%xmm1", "%xmm2", "%xmm3", |
| 294 | + "%xmm4" |
| 295 | + ); |
| 296 | + } |
| 297 | + |
| 298 | + void transpose_matrix3d2(matrix3d_t *r, const matrix3d_t *m) |
| 299 | + { |
| 300 | + ARCH_X86_ASM |
| 301 | + ( |
| 302 | + MATRIX_LOAD("m", "%xmm0", "%xmm1", "%xmm2", "%xmm3") |
| 303 | + MAT4_TRANSPOSE("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4") |
| 304 | + MATRIX_STORE("r", "%xmm0", "%xmm1", "%xmm2", "%xmm3") |
| 305 | + : |
| 306 | + : [r] "r" (r), [m] "r" (m) |
| 307 | + : "memory", |
| 308 | + "%xmm0", "%xmm1", "%xmm2", "%xmm3", |
| 309 | + "%xmm4" |
| 310 | + ); |
| 311 | + } |
| 312 | + |
181 | 313 | } /* namespace avx */
|
182 | 314 | } /* namespace lsp */
|
183 | 315 |
|
|
0 commit comments