Skip to content
20 changes: 12 additions & 8 deletions src/passes/StringLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#include "ir/type-updating.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/json.h"
#include "support/string.h"
#include "wasm-builder.h"
#include "wasm.h"

Expand Down Expand Up @@ -205,8 +205,9 @@ struct StringLowering : public StringGathering {

void makeImports(Module* module) {
Index importIndex = 0;
json::Value stringArray;
stringArray.setArray();
std::stringstream json;
json << '[';
bool first = true;
std::vector<Name> importedStrings;
for (auto& global : module->globals) {
if (global->init) {
Expand All @@ -216,16 +217,19 @@ struct StringLowering : public StringGathering {
importIndex++;
global->init = nullptr;

auto str = json::Value::make(std::string(c->string.str).c_str());
stringArray.push_back(str);
if (first) {
first = false;
} else {
json << ',';
}
String::printEscapedJSON(json, c->string.str);
}
}
}

// Add a custom section with the JSON.
std::stringstream stream;
stringArray.stringify(stream);
auto str = stream.str();
json << ']';
auto str = json.str();
auto vec = std::vector<char>(str.begin(), str.end());
module->customSections.emplace_back(
CustomSection{"string.consts", std::move(vec)});
Expand Down
4 changes: 2 additions & 2 deletions src/support/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
*/

#include "support/json.h"
#include "support/string.h"

namespace json {

void Value::stringify(std::ostream& os, bool pretty) {
if (isString()) {
// TODO: escaping
os << '"' << getCString() << '"';
wasm::String::printEscapedJSON(os, getCString());
} else if (isArray()) {
os << '[';
auto first = true;
Expand Down
74 changes: 73 additions & 1 deletion src/support/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ std::string trim(const std::string& input) {
return input.substr(0, size);
}

std::ostream& printEscaped(std::ostream& os, std::string_view str) {
std::ostream& printEscaped(std::ostream& os, const std::string_view str) {
os << '"';
for (unsigned char c : str) {
switch (c) {
Expand Down Expand Up @@ -140,4 +140,76 @@ std::ostream& printEscaped(std::ostream& os, std::string_view str) {
return os << '"';
}

std::ostream& printEscapedJSON(std::ostream& os, const std::string_view str) {
os << '"';
for (size_t i = 0; i < str.size(); i++) {
int u0 = str[i];
switch (u0) {
case '\t':
os << "\\t";
continue;
case '\n':
os << "\\n";
continue;
case '\r':
os << "\\r";
continue;
case '"':
os << "\\\"";
continue;
case '\'':
os << "'";
continue;
case '\\':
os << "\\\\";
continue;
default: {
auto uEscape = [&](uint32_t v) {
os << std::hex;
os << "\\u";
os << ((v >> 24) & 0xff);
os << ((v >> 16) & 0xff);
os << ((v >> 8) & 0xff);
os << (v & 0xff);
os << std::dec;
};

// Based off of
// https://github.com/emscripten-core/emscripten/blob/59e6b8f1262d75585d8416b728e8cbb3db176fe2/src/library_strings.js#L72-L91
if (!(u0 & 0x80)) {
if (u0 >= 32 && u0 < 127) {
os << char(u0);
} else {
uEscape(u0);
}
continue;
}
i++;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to check that we haven't run off the end of the string whenever we increment i.

int u1 = str[i] & 63;
if ((u0 & 0xE0) == 0xC0) {
uEscape((((u0 & 31) << 6) | u1));
continue;
}
i++;
int u2 = str[i] & 63;
if ((u0 & 0xF0) == 0xE0) {
u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
} else {
i++;
u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (str[i] & 63);
}

if (u0 < 0x10000) {
uEscape(u0);
} else {
auto ch = u0 - 0x10000;
uEscape(0xD800 | (ch >> 10));
uEscape(0xDC00 | (ch & 0x3FF));
}
}
}
}
return os << '"';
}

} // namespace wasm::String
4 changes: 3 additions & 1 deletion src/support/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ inline bool isNumber(const std::string& str) {
return !str.empty() && std::all_of(str.begin(), str.end(), ::isdigit);
}

std::ostream& printEscaped(std::ostream& os, std::string_view str);
std::ostream& printEscaped(std::ostream& os, const std::string_view str);

std::ostream& printEscapedJSON(std::ostream& os, const std::string_view str);

} // namespace wasm::String

Expand Down
8 changes: 8 additions & 0 deletions test/lit/passes/string-lowering.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
var filename = process.argv[2];
var module = new WebAssembly.Module(require('fs').readFileSync(filename));
var sections = WebAssembly.Module.customSections(module, 'string.consts');
var array = new Uint8Array(sections[0]);
var string = new TextDecoder('utf-8').decode(array);
console.log("string:", string);
var json = JSON.parse(string);
console.log("JSON:", json);
23 changes: 19 additions & 4 deletions test/lit/passes/string-lowering.wast
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
;; operations are tested in string-gathering.wast (which is auto-updated, unlike
;; this which is manual).

;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s

(module
(func $consts
(drop
Expand All @@ -15,9 +13,26 @@
(drop
(string.const "foo")
)
(drop
(string.const "needs\tescaping\00.")
)
)
)

;; The custom section should contain foo and bar, and foo only once.
;; CHECK: custom section "string.consts", size 13, contents: "[\"bar\",\"foo\"]"
;; The custom section should contain foo and bar, and foo only once, and the
;; string with \t should be escaped.
;;
;; RUN: wasm-opt %s --string-lowering -all -S -o - | filecheck %s
;;
;; CHECK: custom section "string.consts", size 38, contents: "[\"bar\",\"foo\",\"needs\\tescaping\\u0000.\"]"

;; The custom section should parse OK using JSON.parse from node.
;; (Note we run --remove-unused-module-elements to remove externref-using
;; imports, which require a newer version of node.)
;;
;; RUN: wasm-opt %s --string-lowering --remove-unused-module-elements -all -o %t.wasm
;; RUN: node %S/string-lowering.js %t.wasm | filecheck %s --check-prefix=CHECK-JS
;;
;; CHECK-JS: string: ["bar","foo","needs\tescaping\u0000."]
;; CHECK-JS: JSON: [ 'bar', 'foo', 'needs\tescaping\u0000.' ]