Skip to content
93 changes: 85 additions & 8 deletions src/ir/module-splitting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#include "ir/module-utils.h"
#include "ir/names.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "wasm-builder.h"
#include "wasm.h"

Expand Down Expand Up @@ -295,6 +296,7 @@ struct ModuleSplitter {
void moveSecondaryFunctions();
void thunkExportedSecondaryFunctions();
void indirectCallsToSecondaryFunctions();
void indirectReferencesToSecondaryFunctions();
void exportImportCalledPrimaryFunctions();
void setupTablePatching();
void shareImportableItems();
Expand All @@ -311,6 +313,7 @@ struct ModuleSplitter {
}
moveSecondaryFunctions();
thunkExportedSecondaryFunctions();
indirectReferencesToSecondaryFunctions();
indirectCallsToSecondaryFunctions();
exportImportCalledPrimaryFunctions();
setupTablePatching();
Expand Down Expand Up @@ -529,10 +532,86 @@ Expression* ModuleSplitter::maybeLoadSecondary(Builder& builder,
return builder.makeSequence(loadSecondary, callIndirect);
}

void ModuleSplitter::indirectReferencesToSecondaryFunctions() {
// Turn references to secondary functions into references to thunks that
// perform a direct call to the original referent. The direct calls in the
// thunks will be handled like all other cross-module calls later, in
// |indirectCallsToSecondaryFunctions|.
struct Gatherer : public PostWalker<Gatherer> {
ModuleSplitter& parent;

Gatherer(ModuleSplitter& parent) : parent(parent) {}

// Collect RefFuncs in a map from the function name to all RefFuncs that
// refer to it. We only collect this for secondary funcs.
InsertOrderedMap<Name, std::vector<RefFunc*>> map;

void visitRefFunc(RefFunc* curr) {
if (parent.secondaryFuncs.count(curr->func)) {
map[curr->func].push_back(curr);
}
}
} gatherer(*this);
gatherer.walkModule(&primary);

// Find all RefFuncs in active elementSegments, which we can ignore: tables
// are the means by which we connect the modules, and are handled directly.
// Passive segments, however, are like RefFuncs in code, and we need to not
// ignore them here.
std::unordered_set<RefFunc*> ignore;
for (auto& seg : primary.elementSegments) {
if (!seg->table.is()) {
continue;
}
for (auto* curr : seg->data) {
if (auto* refFunc = curr->dynCast<RefFunc>()) {
ignore.insert(refFunc);
}
}
}

// Fix up what we found: Generate trampolines as described earlier, and apply
// them.
Builder builder(primary);
// Generate the new trampoline function and add it to the module.
for (auto& [name, refFuncs] : gatherer.map) {
// Find the relevant (non-ignored) RefFuncs. If there are none, we can skip
// creating a thunk entirely.
std::vector<RefFunc*> relevantRefFuncs;
for (auto* refFunc : refFuncs) {
assert(refFunc->func == name);
if (!ignore.count(refFunc)) {
relevantRefFuncs.push_back(refFunc);
}
}
if (relevantRefFuncs.empty()) {
continue;
}

auto* oldFunc = secondary.getFunction(name);
auto newName = Names::getValidFunctionName(
primary, std::string("trampoline_") + name.toString());

// Generate the call and the function.
std::vector<Expression*> args;
for (Index i = 0; i < oldFunc->getNumParams(); i++) {
args.push_back(builder.makeLocalGet(i, oldFunc->getLocalType(i)));
}
auto* call = builder.makeCall(name, args, oldFunc->getResults());

primary.addFunction(builder.makeFunction(newName, oldFunc->type, {}, call));

// Update RefFuncs to refer to it.
for (auto* refFunc : relevantRefFuncs) {
refFunc->func = newName;
}
}
}

void ModuleSplitter::indirectCallsToSecondaryFunctions() {
// Update direct calls of secondary functions to be indirect calls of their
// corresponding table indices instead.
struct CallIndirector : public WalkerPass<PostWalker<CallIndirector>> {
struct CallIndirector : public PostWalker<CallIndirector> {
ModuleSplitter& parent;
Builder builder;
CallIndirector(ModuleSplitter& parent)
Expand All @@ -554,16 +633,12 @@ void ModuleSplitter::indirectCallsToSecondaryFunctions() {
func->type,
curr->isReturn)));
}
void visitRefFunc(RefFunc* curr) {
assert(false && "TODO: handle ref.func as well");
}
};
PassRunner runner(&primary);
CallIndirector(*this).run(&runner, &primary);
CallIndirector(*this).walkModule(&primary);
}

void ModuleSplitter::exportImportCalledPrimaryFunctions() {
// Find primary functions called in the secondary module.
// Find primary functions called/referred in the secondary module.
ModuleUtils::ParallelFunctionAnalysis<std::vector<Name>> callCollector(
secondary, [&](Function* func, std::vector<Name>& calledPrimaryFuncs) {
struct CallCollector : PostWalker<CallCollector> {
Expand All @@ -579,7 +654,9 @@ void ModuleSplitter::exportImportCalledPrimaryFunctions() {
}
}
void visitRefFunc(RefFunc* curr) {
assert(false && "TODO: handle ref.func as well");
if (primaryFuncs.count(curr->func)) {
calledPrimaryFuncs.push_back(curr->func);
}
}
};
CallCollector(primaryFuncs, calledPrimaryFuncs).walkFunction(func);
Expand Down
54 changes: 54 additions & 0 deletions test/lit/wasm-split/passive.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.

;; RUN: wasm-split %s --split-funcs=second-in-table -g -o1 %t.1.wasm -o2 %t.2.wasm -all
;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY
;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY

(module
(type $func-array (array (mut funcref)))

;; PRIMARY: (type $0 (func))

;; PRIMARY: (import "placeholder" "0" (func $placeholder_0))

;; PRIMARY: (table $table 3 funcref)
(table $table 3 funcref)

;; Workaround for https://github.com/WebAssembly/binaryen/issues/6572 - we
;; error without an active segment.
(elem (i32.const 0))

;; PRIMARY: (elem $0 (i32.const 0) $placeholder_0)

;; PRIMARY: (elem $passive func $in-table $1)
(elem $passive func $in-table $second-in-table)

;; PRIMARY: (export "table" (table $table))

;; PRIMARY: (func $in-table
;; PRIMARY-NEXT: (nop)
;; PRIMARY-NEXT: )
(func $in-table
;; This is in a passive segment, but it is in the main module so we need no
;; special handling.
)

;; SECONDARY: (type $0 (func))

;; SECONDARY: (import "primary" "table" (table $table 3 funcref))

;; SECONDARY: (elem $0 (i32.const 0) $second-in-table)

;; SECONDARY: (func $second-in-table
;; SECONDARY-NEXT: (nop)
;; SECONDARY-NEXT: )
(func $second-in-table
;; This is in a passive segment, and it is in the secondary module, so we will
;; handle it by adding a trampoline from the segment as a new function "$1".
)
)
;; PRIMARY: (func $1
;; PRIMARY-NEXT: (call_indirect (type $0)
;; PRIMARY-NEXT: (i32.const 0)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: )
106 changes: 106 additions & 0 deletions test/lit/wasm-split/ref.func.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.

;; RUN: wasm-split %s --split-funcs=second,second-in-table -g -o1 %t.1.wasm -o2 %t.2.wasm -all
;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY
;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY

;; Test that we handle ref.func operations properly as we split out $second.
;; ref.funcs that refer to the other module must be fixed up to refer to
;; something in the same module, that then trampolines to the other.
(module
;; PRIMARY: (type $0 (func))

;; PRIMARY: (import "placeholder" "1" (func $placeholder_1))

;; PRIMARY: (import "placeholder" "2" (func $placeholder_2))

;; PRIMARY: (global $glob1 (ref func) (ref.func $prime))

;; PRIMARY: (global $glob2 (ref func) (ref.func $2))

;; PRIMARY: (table $table 3 3 funcref)
(table $table 1 1 funcref)

(global $glob1 (ref func) (ref.func $prime))

(global $glob2 (ref func) (ref.func $second))

;; PRIMARY: (elem $elem (i32.const 0) $in-table $placeholder_1 $placeholder_2)
(elem $elem (i32.const 0) $in-table $second-in-table)

;; PRIMARY: (export "prime" (func $prime))

;; PRIMARY: (export "table" (table $table))

;; PRIMARY: (export "global" (global $glob1))

;; PRIMARY: (export "global_3" (global $glob2))

;; PRIMARY: (func $prime
;; PRIMARY-NEXT: (drop
;; PRIMARY-NEXT: (ref.func $prime)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: (drop
;; PRIMARY-NEXT: (ref.func $2)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: )
(func $prime
(drop
(ref.func $prime)
)
(drop
(ref.func $second)
)
)

;; SECONDARY: (type $0 (func))

;; SECONDARY: (import "primary" "table" (table $table 3 3 funcref))

;; SECONDARY: (import "primary" "global" (global $glob1 (ref func)))

;; SECONDARY: (import "primary" "global_3" (global $glob2 (ref func)))

;; SECONDARY: (import "primary" "prime" (func $prime))

;; SECONDARY: (elem $0 (i32.const 1) $second-in-table $second)

;; SECONDARY: (func $second
;; SECONDARY-NEXT: (drop
;; SECONDARY-NEXT: (ref.func $prime)
;; SECONDARY-NEXT: )
;; SECONDARY-NEXT: (drop
;; SECONDARY-NEXT: (ref.func $second)
;; SECONDARY-NEXT: )
;; SECONDARY-NEXT: )
(func $second
(drop
(ref.func $prime)
)
(drop
(ref.func $second)
)
)

;; PRIMARY: (func $in-table
;; PRIMARY-NEXT: (nop)
;; PRIMARY-NEXT: )
(func $in-table
;; This empty function is in the table. Just being present in the table is not
;; enough of a reason for us to make a trampoline, even though in our IR the
;; table is a list of ref.funcs.
Comment on lines +89 to +91
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does the code differentiate RefFuncs in a table from other RefFuncs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this was a part I meant to write Friday evening and somehow forgot 😄 Added now + testing.

)

;; SECONDARY: (func $second-in-table
;; SECONDARY-NEXT: (nop)
;; SECONDARY-NEXT: )
(func $second-in-table
;; As above, but in the secondary module. We still don't need a trampoline
;; (but we will get a placeholder, as all split-out functions do).
)
)
;; PRIMARY: (func $2
;; PRIMARY-NEXT: (call_indirect (type $0)
;; PRIMARY-NEXT: (i32.const 2)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: )