Skip to content

Commit 27f618c

Browse files
Jokerensoumith
authored andcommitted
Add THVector Fill AVX
1 parent a14482a commit 27f618c

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

vector/AVX.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@
44
#include <intrin.h>
55
#endif
66

7+
static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
8+
ptrdiff_t i;
9+
ptrdiff_t off;
10+
__m256d YMM0 = _mm256_set_pd(c, c, c, c);
11+
for (i=0; i<=((n)-16); i+=16) {
12+
_mm256_storeu_pd((x)+i , YMM0);
13+
_mm256_storeu_pd((x)+i+4, YMM0);
14+
_mm256_storeu_pd((x)+i+8, YMM0);
15+
_mm256_storeu_pd((x)+i+12, YMM0);
16+
}
17+
off = (n) - ((n)%16);
18+
for (i=0; i<((n)%16); i++) {
19+
x[off+i] = c;
20+
}
21+
}
22+
723
static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
824
ptrdiff_t i;
925
__m256d YMM0, YMM1, YMM2, YMM3;
@@ -107,6 +123,21 @@ static void THDoubleVector_add_AVX(double *y, const double *x, const double c, c
107123
}
108124
}
109125

126+
static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
127+
ptrdiff_t i;
128+
ptrdiff_t off;
129+
__m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c);
130+
for (i=0; i<=((n)-32); i+=32) {
131+
_mm256_storeu_ps((x)+i , YMM0);
132+
_mm256_storeu_ps((x)+i+8, YMM0);
133+
_mm256_storeu_ps((x)+i+16, YMM0);
134+
_mm256_storeu_ps((x)+i+24, YMM0);
135+
}
136+
off = (n) - ((n)%32);
137+
for (i=0; i<((n)%32); i++) {
138+
x[off+i] = c;
139+
}
140+
}
110141

111142
static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
112143
ptrdiff_t i;

0 commit comments

Comments
 (0)