Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions std/assembly/internal/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ export function weakHeapSort<T>(arr: Array<T>, comparator: (a: T, b: T) => i32):
const typeShift = alignof<T>();
const intShift = alignof<i32>();

var blen = (len + 7) >> 3;
var blen = (len + 31) >> 5;
Copy link
Member

@dcodeIO dcodeIO Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to understand this change. Before, there was one byte per 8 elements (1 bit per element), if I understood this correctly:

8 elements -> blen=1
9 elements -> blen=2

Afterwards, with the change, this becomes:

8 elements -> blen=1
9 elements -> blen=1 (is this correct?)

Copy link
Member Author

@MaxGraey MaxGraey Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right!

Before:
8 elements -> blen = 1
9 elements -> blen = 2

After:
8 elements -> blen = 1
9 elements -> blen = 1
32 elements -> blen = 1
33 elements -> blen = 2
64 elements -> blen = 2

So for 32 elements now used full space of 32-bit word

Copy link
Member

@dcodeIO dcodeIO Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be missing something, but my impression was that the bitset needs one bit per element? Or does it calculate the number of 32-bit ints now and blen isn't synonymous for byteLength anymore?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, bitset just add one extra bit per element.

Copy link
Member

@dcodeIO dcodeIO Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, if there are 3 elements, wouldn't it need 3 distinct bits still (one per element)? Otherwise, let's say there are three elements, the bitset would be 0011b, unable to distinguish this from element 1 and 2 being set?

Or, let me rephrase: If there are 32 elements, the bitset would need 32 bits. With 8 bits per byte, that are 4 bytes, not 1?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I hope Uint32Array is more performant. In JS version it's true

Copy link
Member

@dcodeIO dcodeIO Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Than an Uint8Array? Even though an Uint32Array involves additional (obviously hard to understand ^^) shifts?

Copy link
Member Author

@MaxGraey MaxGraey Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you mean Array<bool> or Array<u8> which store only one bit of 8-bits current version slower of course but we loose 7-bits and our memory usage is worst. So this is balance between memory usage/speed. I mean current version faster previous when I used 8-bit vector inside 32-bit array =)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure Array<bool> doesn't make sense because it wastes 7/8 bits, but why wouldn't u8 work, storing 8 element bits per u8, over an u32 storing 32 element bits per u32?

Copy link
Member Author

@MaxGraey MaxGraey Apr 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. That's what I wrote and older version works perfectly, but we loose 24-bits and use the same xor-shifts techniques for access to bits. Now I rewrite this to use full 32-bits range

var bitset = allocate_memory(blen << intShift);

set_memory(bitset, 0, blen << intShift);

for (i = len - 1; i > 0; i--) {
j = i;
while ((j & 1) == ((load<i32>(bitset + ((j >> 4) << intShift)) >> ((j >> 1) & 7)) & 1)) {
while ((j & 1) == ((load<i32>(bitset + ((j >> 6) << intShift)) >> ((j >> 1) & 31)) & 1)) {
j >>= 1;
}

Expand All @@ -54,8 +54,8 @@ export function weakHeapSort<T>(arr: Array<T>, comparator: (a: T, b: T) => i32):

if (comparator(a, b) < 0) {
store<i32>(
bitset + ((i >> 3) << intShift),
load<i32>(bitset + ((i >> 3) << intShift)) ^ (1 << (i & 7))
bitset + ((i >> 5) << intShift),
load<i32>(bitset + ((i >> 5) << intShift)) ^ (1 << (i & 31))
);
store<T>(arr.__memory + (i << typeShift), a); // arr[i] = a;
store<T>(arr.__memory + (p << typeShift), b); // arr[p] = b;
Expand All @@ -73,7 +73,7 @@ export function weakHeapSort<T>(arr: Array<T>, comparator: (a: T, b: T) => i32):
store<T>(arr.__memory + (i << typeShift), a);

let x = 1;
while ((y = (x << 1) + ((load<i32>(bitset + ((x >> 3) << intShift)) >> (x & 7)) & 1)) < i) {
while ((y = (x << 1) + ((load<i32>(bitset + ((x >> 5) << intShift)) >> (x & 31)) & 1)) < i) {
x = y;
}

Expand All @@ -83,8 +83,8 @@ export function weakHeapSort<T>(arr: Array<T>, comparator: (a: T, b: T) => i32):

if (comparator(a, b) < 0) {
store<i32>(
bitset + ((x >> 3) << intShift),
load<i32>(bitset + ((x >> 3) << intShift)) ^ (1 << (x & 7))
bitset + ((x >> 5) << intShift),
load<i32>(bitset + ((x >> 5) << intShift)) ^ (1 << (x & 31))
);

store<T>(arr.__memory + (x << typeShift), a); // arr[x] = a;
Expand Down
24 changes: 12 additions & 12 deletions tests/compiler/std/array.optimized.wat
Original file line number Diff line number Diff line change
Expand Up @@ -4030,9 +4030,9 @@
(get_local $0)
)
)
(i32.const 7)
(i32.const 31)
)
(i32.const 3)
(i32.const 5)
)
)
(i32.const 2)
Expand Down Expand Up @@ -4076,7 +4076,7 @@
(i32.shl
(i32.shr_s
(get_local $2)
(i32.const 4)
(i32.const 6)
)
(i32.const 2)
)
Expand All @@ -4087,7 +4087,7 @@
(get_local $2)
(i32.const 1)
)
(i32.const 7)
(i32.const 31)
)
)
(i32.const 1)
Expand Down Expand Up @@ -4156,7 +4156,7 @@
(i32.shl
(i32.shr_s
(get_local $3)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -4168,7 +4168,7 @@
(i32.shl
(i32.shr_s
(get_local $3)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -4178,7 +4178,7 @@
(i32.const 1)
(i32.and
(get_local $3)
(i32.const 7)
(i32.const 31)
)
)
)
Expand Down Expand Up @@ -4287,15 +4287,15 @@
(i32.shl
(i32.shr_s
(get_local $2)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
)
)
(i32.and
(get_local $2)
(i32.const 7)
(i32.const 31)
)
)
(i32.const 1)
Expand Down Expand Up @@ -4360,7 +4360,7 @@
(i32.shl
(i32.shr_s
(get_local $2)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -4372,7 +4372,7 @@
(i32.shl
(i32.shr_s
(get_local $2)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -4382,7 +4382,7 @@
(i32.const 1)
(i32.and
(get_local $2)
(i32.const 7)
(i32.const 31)
)
)
)
Expand Down
24 changes: 12 additions & 12 deletions tests/compiler/std/array.untouched.wat
Original file line number Diff line number Diff line change
Expand Up @@ -4636,9 +4636,9 @@
(i32.shr_s
(i32.add
(get_local $2)
(i32.const 7)
(i32.const 31)
)
(i32.const 3)
(i32.const 5)
)
)
(set_local $10
Expand Down Expand Up @@ -4691,7 +4691,7 @@
(i32.shl
(i32.shr_s
(get_local $4)
(i32.const 4)
(i32.const 6)
)
(i32.const 2)
)
Expand All @@ -4702,7 +4702,7 @@
(get_local $4)
(i32.const 1)
)
(i32.const 7)
(i32.const 31)
)
)
(i32.const 1)
Expand Down Expand Up @@ -4775,7 +4775,7 @@
(i32.shl
(i32.shr_s
(get_local $3)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -4787,7 +4787,7 @@
(i32.shl
(i32.shr_s
(get_local $3)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -4797,7 +4797,7 @@
(i32.const 1)
(i32.and
(get_local $3)
(i32.const 7)
(i32.const 31)
)
)
)
Expand Down Expand Up @@ -4911,15 +4911,15 @@
(i32.shl
(i32.shr_s
(get_local $11)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
)
)
(i32.and
(get_local $11)
(i32.const 7)
(i32.const 31)
)
)
(i32.const 1)
Expand Down Expand Up @@ -4989,7 +4989,7 @@
(i32.shl
(i32.shr_s
(get_local $11)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -5001,7 +5001,7 @@
(i32.shl
(i32.shr_s
(get_local $11)
(i32.const 3)
(i32.const 5)
)
(i32.const 2)
)
Expand All @@ -5011,7 +5011,7 @@
(i32.const 1)
(i32.and
(get_local $11)
(i32.const 7)
(i32.const 31)
)
)
)
Expand Down