Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions std/assembly/util/memory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,16 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
} else {
// fill head and tail with minimal branching
if (!n) return;
let dend = dest + n - 4;
store<u8>(dest, c);
store<u8>(dend, c, 3);
store<u8>(dest + n - 1, c);
if (n <= 2) return;
store<u8>(dest, c, 1);
store<u8>(dest, c, 2);
store<u8>(dend, c, 2);
store<u8>(dend, c, 1);
store<u8>(dest + 1, c);
store<u8>(dest + 2, c);
store<u8>(dest + n - 2, c);
store<u8>(dest + n - 3, c);
if (n <= 6) return;
store<u8>(dest, c, 3);
store<u8>(dend, c);
store<u8>(dest + 3, c);
store<u8>(dest + n - 4, c);
if (n <= 8) return;

// advance pointer to align it at 4-byte boundary
Expand All @@ -223,23 +222,22 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
let c32: u32 = <u32>-1 / 255 * c;

// fill head/tail up to 28 bytes each in preparation
dend = dest + n - 28;
store<u32>(dest, c32);
store<u32>(dend, c32, 24);
store<u32>(dest + n - 4, c32);
if (n <= 8) return;
store<u32>(dest, c32, 4);
store<u32>(dest, c32, 8);
store<u32>(dend, c32, 16);
store<u32>(dend, c32, 20);
store<u32>(dest + 4, c32);
store<u32>(dest + 8, c32);
store<u32>(dest + n - 12, c32);
store<u32>(dest + n - 8, c32);
if (n <= 24) return;
store<u32>(dest, c32, 12);
store<u32>(dest, c32, 16);
store<u32>(dest, c32, 20);
store<u32>(dest, c32, 24);
store<u32>(dend, c32);
store<u32>(dend, c32, 4);
store<u32>(dend, c32, 8);
store<u32>(dend, c32, 12);
store<u32>(dest + 12, c32);
store<u32>(dest + 16, c32);
store<u32>(dest + 20, c32);
store<u32>(dest + 24, c32);
store<u32>(dest + n - 28, c32);
store<u32>(dest + n - 24, c32);
store<u32>(dest + n - 20, c32);
store<u32>(dest + n - 16, c32);

// align to a multiple of 8
k = 24 + (dest & 4);
Expand All @@ -250,9 +248,9 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
let c64: u64 = <u64>c32 | (<u64>c32 << 32);
while (n >= 32) {
store<u64>(dest, c64);
store<u64>(dest, c64, 8);
store<u64>(dest, c64, 16);
store<u64>(dest, c64, 24);
store<u64>(dest + 8, c64);
store<u64>(dest + 16, c64);
store<u64>(dest + 24, c64);
n -= 32;
dest += 32;
}
Expand Down
75 changes: 51 additions & 24 deletions tests/compiler/call-super.optimized.wat
Original file line number Diff line number Diff line change
Expand Up @@ -1449,11 +1449,10 @@
local.get $0
local.get $1
i32.add
i32.const 4
i32.const 1
i32.sub
local.tee $2
i32.const 0
i32.store8 offset=3
i32.store8
local.get $1
i32.const 2
i32.le_u
Expand All @@ -1464,20 +1463,31 @@
local.get $0
i32.const 0
i32.store8 offset=2
local.get $2
local.get $0
local.get $1
i32.add
local.tee $2
i32.const 2
i32.sub
i32.const 0
i32.store8 offset=2
i32.store8
local.get $2
i32.const 3
i32.sub
i32.const 0
i32.store8 offset=1
i32.store8
local.get $1
i32.const 6
i32.le_u
br_if $~lib/util/memory/memset|inlined.0
local.get $0
i32.const 0
i32.store8 offset=3
local.get $2
local.get $0
local.get $1
i32.add
i32.const 4
i32.sub
i32.const 0
i32.store8
local.get $1
Expand All @@ -1501,14 +1511,13 @@
i32.sub
i32.const -4
i32.and
local.tee $2
local.tee $1
i32.add
i32.const 28
i32.const 4
i32.sub
local.tee $1
i32.const 0
i32.store offset=24
local.get $2
i32.store
local.get $1
i32.const 8
i32.le_u
br_if $~lib/util/memory/memset|inlined.0
Expand All @@ -1518,13 +1527,20 @@
local.get $0
i32.const 0
i32.store offset=8
local.get $0
local.get $1
i32.add
local.tee $2
i32.const 12
i32.sub
i32.const 0
i32.store offset=16
local.get $1
i32.const 0
i32.store offset=20
i32.store
local.get $2
i32.const 8
i32.sub
i32.const 0
i32.store
local.get $1
i32.const 24
i32.le_u
br_if $~lib/util/memory/memset|inlined.0
Expand All @@ -1540,29 +1556,40 @@
local.get $0
i32.const 0
i32.store offset=24
local.get $0
local.get $1
i32.add
local.tee $2
i32.const 28
i32.sub
i32.const 0
i32.store
local.get $1
local.get $2
i32.const 24
i32.sub
i32.const 0
i32.store offset=4
local.get $1
i32.store
local.get $2
i32.const 20
i32.sub
i32.const 0
i32.store offset=8
local.get $1
i32.store
local.get $2
i32.const 16
i32.sub
i32.const 0
i32.store offset=12
i32.store
local.get $0
local.get $0
i32.const 4
i32.and
i32.const 24
i32.add
local.tee $1
local.tee $2
i32.add
local.set $0
local.get $2
local.get $1
local.get $2
i32.sub
local.set $1
loop $while-continue|0
Expand Down
Loading