forked from LeenkxTeam/Kmake
382 lines
13 KiB
JavaScript
382 lines
13 KiB
JavaScript
// Copyright 2024 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// Flags: --wasm-staging
|
|
// For {isOneByteString}:
|
|
// Flags: --expose-externalize-string
|
|
|
|
d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
|
|
|
|
let kRefExtern = wasmRefType(kWasmExternRef);
|
|
|
|
// We use "r" for nullable "externref", and "e" for non-nullable "ref extern".
|
|
|
|
let kSig_e_ii = makeSig([kWasmI32, kWasmI32], [kRefExtern]);
|
|
let kSig_e_v = makeSig([], [kRefExtern]);
|
|
let kSig_e_rii = makeSig([kWasmExternRef, kWasmI32, kWasmI32],
|
|
[kRefExtern]);
|
|
let kSig_e_r = makeSig([kWasmExternRef], [kRefExtern]);
|
|
|
|
let interestingStrings = [
|
|
'',
|
|
'ascii',
|
|
'latin\xa91', // Latin-1.
|
|
'2 \ucccc b', // Two-byte.
|
|
'a \ud800\udc00 b', // Proper surrogate pair.
|
|
'a \ud800 b', // Lone lead surrogate.
|
|
'a \udc00 b', // Lone trail surrogate.
|
|
'\ud800 bc', // Lone lead surrogate at the start.
|
|
'\udc00 bc', // Lone trail surrogate at the start.
|
|
'ab \ud800', // Lone lead surrogate at the end.
|
|
'ab \udc00', // Lone trail surrogate at the end.
|
|
'a \udc00\ud800 b', // Swapped surrogate pair.
|
|
];
|
|
|
|
function IsSurrogate(codepoint) {
|
|
return 0xD800 <= codepoint && codepoint <= 0xDFFF
|
|
}
|
|
|
|
function HasIsolatedSurrogate(str) {
|
|
for (let codepoint of str) {
|
|
let value = codepoint.codePointAt(0);
|
|
if (IsSurrogate(value)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
function ReplaceIsolatedSurrogates(str, replacement='\ufffd') {
|
|
let replaced = '';
|
|
for (let codepoint of str) {
|
|
replaced +=
|
|
IsSurrogate(codepoint.codePointAt(0)) ? replacement : codepoint;
|
|
}
|
|
return replaced;
|
|
}
|
|
|
|
let kArrayI8;
|
|
let kStringFromUtf8Array;
|
|
let kStringIntoUtf8Array;
|
|
let kStringToUtf8Array;
|
|
let kStringMeasureUtf8;
|
|
|
|
function MakeBuilder() {
|
|
let builder = new WasmModuleBuilder();
|
|
builder.startRecGroup();
|
|
kArrayI8 = builder.addArray(kWasmI8, true, kNoSuperType, true);
|
|
builder.endRecGroup();
|
|
let array8ref = wasmRefNullType(kArrayI8);
|
|
|
|
kStringFromUtf8Array = builder.addImport(
|
|
'wasm:text-decoder', 'decodeStringFromUTF8Array',
|
|
makeSig([array8ref, kWasmI32, kWasmI32], [kRefExtern]));
|
|
kStringMeasureUtf8 =
|
|
builder.addImport('wasm:text-encoder', 'measureStringAsUTF8', kSig_i_r);
|
|
kStringIntoUtf8Array = builder.addImport(
|
|
'wasm:text-encoder', 'encodeStringIntoUTF8Array',
|
|
makeSig([kWasmExternRef, array8ref, kWasmI32], [kWasmI32]));
|
|
kStringToUtf8Array = builder.addImport(
|
|
'wasm:text-encoder', 'encodeStringToUTF8Array',
|
|
makeSig([kWasmExternRef], [wasmRefType(kArrayI8)]));
|
|
|
|
return builder;
|
|
}
|
|
|
|
let kImports = {};
|
|
let kBuiltins = { builtins: ["text-decoder", "text-encoder"] };
|
|
|
|
function encodeWtf8(str) {
|
|
// String iterator coalesces surrogate pairs.
|
|
let out = [];
|
|
for (let codepoint of str) {
|
|
codepoint = codepoint.codePointAt(0);
|
|
if (codepoint <= 0x7f) {
|
|
out.push(codepoint);
|
|
} else if (codepoint <= 0x7ff) {
|
|
out.push(0xc0 | (codepoint >> 6));
|
|
out.push(0x80 | (codepoint & 0x3f));
|
|
} else if (codepoint <= 0xffff) {
|
|
out.push(0xe0 | (codepoint >> 12));
|
|
out.push(0x80 | ((codepoint >> 6) & 0x3f));
|
|
out.push(0x80 | (codepoint & 0x3f));
|
|
} else if (codepoint <= 0x10ffff) {
|
|
out.push(0xf0 | (codepoint >> 18));
|
|
out.push(0x80 | ((codepoint >> 12) & 0x3f));
|
|
out.push(0x80 | ((codepoint >> 6) & 0x3f));
|
|
out.push(0x80 | (codepoint & 0x3f));
|
|
} else {
|
|
throw new Error("bad codepoint " + codepoint);
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function makeWtf8TestDataSegment() {
|
|
let data = []
|
|
let valid = {};
|
|
let invalid = {};
|
|
|
|
for (let str of interestingStrings) {
|
|
let bytes = encodeWtf8(str);
|
|
valid[str] = { offset: data.length, length: bytes.length };
|
|
for (let byte of bytes) {
|
|
data.push(byte);
|
|
}
|
|
}
|
|
let invalid_inputs = [
|
|
'trailing high byte \xa9',
|
|
'interstitial high \xa9 byte',
|
|
'invalid \xc0 byte',
|
|
'invalid three-byte \xed\xd0\x80',
|
|
'surrogate \xed\xa0\x80\xed\xb0\x80 pair'
|
|
];
|
|
let invalid_replaced = [
|
|
'trailing high byte \uFFFD',
|
|
'interstitial high \uFFFD byte',
|
|
'invalid \uFFFD byte',
|
|
'invalid three-byte \uFFFD\u0400',
|
|
'surrogate \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD pair'
|
|
];
|
|
for (let i = 0; i < invalid_inputs.length; i++) {
|
|
let bytes = invalid_inputs[i];
|
|
invalid[bytes] = {
|
|
offset: data.length,
|
|
length: bytes.length,
|
|
replaced: invalid_replaced[i]
|
|
};
|
|
for (let i = 0; i < bytes.length; i++) {
|
|
data.push(bytes.charCodeAt(i));
|
|
}
|
|
}
|
|
return { valid, invalid, data: Uint8Array.from(data) };
|
|
};
|
|
|
|
(function TestStringNewUtf8Array() {
|
|
print(arguments.callee.name);
|
|
let builder = MakeBuilder();
|
|
let data = makeWtf8TestDataSegment();
|
|
let data_index = builder.addPassiveDataSegment(data.data);
|
|
|
|
let ascii_data_index =
|
|
builder.addPassiveDataSegment(Uint8Array.from(encodeWtf8("ascii")));
|
|
|
|
let make_i8_array = builder.addFunction(
|
|
"make_i8_array", makeSig([], [wasmRefType(kArrayI8)]))
|
|
.addBody([
|
|
...wasmI32Const(0),
|
|
...wasmI32Const(data.data.length),
|
|
kGCPrefix, kExprArrayNewData, kArrayI8, data_index
|
|
]).index;
|
|
|
|
builder.addFunction("new_utf8", kSig_e_ii)
|
|
.exportFunc()
|
|
.addBody([
|
|
kExprCallFunction, make_i8_array,
|
|
kExprLocalGet, 0, kExprLocalGet, 1,
|
|
kExprCallFunction, kStringFromUtf8Array,
|
|
]);
|
|
|
|
builder.addFunction("bounds_check", kSig_e_ii)
|
|
.exportFunc()
|
|
.addBody([
|
|
...wasmI32Const(0),
|
|
...wasmI32Const("ascii".length),
|
|
kGCPrefix, kExprArrayNewData, kArrayI8, ascii_data_index,
|
|
kExprLocalGet, 0, kExprLocalGet, 1,
|
|
kExprCallFunction, kStringFromUtf8Array,
|
|
]);
|
|
|
|
builder.addFunction("null_array", kSig_e_v).exportFunc()
|
|
.addBody([
|
|
kExprRefNull, kArrayI8,
|
|
kExprI32Const, 0, kExprI32Const, 0,
|
|
kExprCallFunction, kStringFromUtf8Array,
|
|
]);
|
|
|
|
let instance = builder.instantiate(kImports, kBuiltins);
|
|
for (let [str, {offset, length}] of Object.entries(data.valid)) {
|
|
let start = offset;
|
|
let end = offset + length;
|
|
if (HasIsolatedSurrogate(str)) {
|
|
// Isolated surrogates have the three-byte pattern ED [A0,BF] [80,BF].
|
|
// When the sloppy decoder gets to the second byte, it will reject
|
|
// the sequence, and then retry parsing at the second byte.
|
|
// Seeing the second byte can't start a sequence, it replaces the
|
|
// second byte and continues with the next, which also can't start
|
|
// a sequence. The result is that one isolated surrogate is replaced
|
|
// by three U+FFFD codepoints.
|
|
assertEquals(ReplaceIsolatedSurrogates(str, '\ufffd\ufffd\ufffd'),
|
|
instance.exports.new_utf8(start, end));
|
|
} else {
|
|
assertEquals(str, instance.exports.new_utf8(start, end));
|
|
}
|
|
}
|
|
for (let [str, {offset, length, replaced}] of Object.entries(data.invalid)) {
|
|
let start = offset;
|
|
let end = offset + length;
|
|
assertEquals(replaced, instance.exports.new_utf8(start, end));
|
|
}
|
|
|
|
assertEquals("ascii", instance.exports.bounds_check(0, "ascii".length));
|
|
assertEquals("", instance.exports.bounds_check("ascii".length,
|
|
"ascii".length));
|
|
assertEquals("i", instance.exports.bounds_check("ascii".length - 1,
|
|
"ascii".length));
|
|
assertThrows(() => instance.exports.bounds_check(0, 100),
|
|
WebAssembly.RuntimeError, "array element access out of bounds");
|
|
assertThrows(() => instance.exports.bounds_check(0, -1),
|
|
WebAssembly.RuntimeError, "array element access out of bounds");
|
|
assertThrows(() => instance.exports.bounds_check(-1, 0),
|
|
WebAssembly.RuntimeError, "array element access out of bounds");
|
|
assertThrows(() => instance.exports.bounds_check("ascii".length,
|
|
"ascii".length + 1),
|
|
WebAssembly.RuntimeError, "array element access out of bounds");
|
|
assertThrows(() => instance.exports.null_array(),
|
|
WebAssembly.RuntimeError, "dereferencing a null pointer");
|
|
})();
|
|
|
|
(function TestStringMeasureUtf8() {
|
|
print(arguments.callee.name);
|
|
let builder = MakeBuilder();
|
|
|
|
builder.addFunction("string_measure_utf8", kSig_i_r)
|
|
.exportFunc()
|
|
.addBody([
|
|
kExprLocalGet, 0,
|
|
kExprCallFunction, kStringMeasureUtf8,
|
|
]);
|
|
|
|
builder.addFunction("string_measure_utf8_null", kSig_i_v)
|
|
.exportFunc()
|
|
.addBody([
|
|
kExprRefNull, kExternRefCode,
|
|
kExprCallFunction, kStringMeasureUtf8,
|
|
]);
|
|
|
|
let instance = builder.instantiate(kImports, kBuiltins);
|
|
for (let str of interestingStrings) {
|
|
let wtf8 = encodeWtf8(str);
|
|
assertEquals(wtf8.length, instance.exports.string_measure_utf8(str));
|
|
}
|
|
|
|
assertThrows(() => instance.exports.string_measure_utf8_null(),
|
|
WebAssembly.RuntimeError, "illegal cast");
|
|
})();
|
|
|
|
(function TestStringEncodeUtf8Array() {
|
|
print(arguments.callee.name);
|
|
let builder = MakeBuilder();
|
|
|
|
// Allocate an array that's exactly the expected size, and encode
|
|
// into it. Then decode it.
|
|
// (str, length, offset=0) -> str
|
|
builder.addFunction("encode_utf8", kSig_e_rii)
|
|
.exportFunc()
|
|
.addLocals(wasmRefNullType(kArrayI8), 1)
|
|
.addLocals(kWasmI32, 1)
|
|
.addBody([
|
|
// Allocate buffer.
|
|
kExprLocalGet, 1,
|
|
kGCPrefix, kExprArrayNewDefault, kArrayI8,
|
|
kExprLocalSet, 3,
|
|
|
|
// Write buffer, store number of bytes written.
|
|
kExprLocalGet, 0,
|
|
kExprLocalGet, 3,
|
|
kExprLocalGet, 2,
|
|
kExprCallFunction, kStringIntoUtf8Array,
|
|
kExprLocalSet, 4,
|
|
|
|
// Read buffer.
|
|
kExprLocalGet, 3,
|
|
kExprLocalGet, 2,
|
|
kExprLocalGet, 2, kExprLocalGet, 4, kExprI32Add,
|
|
kExprCallFunction, kStringFromUtf8Array,
|
|
]);
|
|
|
|
|
|
builder.addFunction("encode_null_string", kSig_i_v)
|
|
.exportFunc()
|
|
.addBody([
|
|
kExprRefNull, kExternRefCode,
|
|
kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, kArrayI8,
|
|
kExprI32Const, 0,
|
|
kExprCallFunction, kStringIntoUtf8Array,
|
|
]);
|
|
builder.addFunction("encode_null_array", kSig_i_v)
|
|
.exportFunc()
|
|
.addBody([
|
|
kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, kArrayI8,
|
|
kExprI32Const, 0, kExprI32Const, 0,
|
|
kExprCallFunction, kStringFromUtf8Array,
|
|
kExprRefNull, kArrayI8,
|
|
kExprI32Const, 0,
|
|
kExprCallFunction, kStringIntoUtf8Array,
|
|
]);
|
|
|
|
let instance = builder.instantiate(kImports, kBuiltins);
|
|
|
|
for (let str of interestingStrings) {
|
|
let replaced = ReplaceIsolatedSurrogates(str);
|
|
if (!HasIsolatedSurrogate(str)) assertEquals(str, replaced);
|
|
let wtf8 = encodeWtf8(replaced);
|
|
assertEquals(replaced,
|
|
instance.exports.encode_utf8(str, wtf8.length, 0));
|
|
assertEquals(replaced,
|
|
instance.exports.encode_utf8(str, wtf8.length + 20, 10));
|
|
}
|
|
|
|
assertThrows(() => instance.exports.encode_null_array(),
|
|
WebAssembly.RuntimeError, "dereferencing a null pointer");
|
|
assertThrows(() => instance.exports.encode_null_string(),
|
|
WebAssembly.RuntimeError, "illegal cast");
|
|
|
|
for (let str of interestingStrings) {
|
|
let wtf8 = encodeWtf8(str);
|
|
let message = "array element access out of bounds";
|
|
assertThrows(() => instance.exports.encode_utf8(str, wtf8.length, 1),
|
|
WebAssembly.RuntimeError, message);
|
|
}
|
|
})();
|
|
|
|
(function TestStringToUtf8Array() {
|
|
print(arguments.callee.name);
|
|
let builder = MakeBuilder();
|
|
|
|
// Convert the string to an array, then decode it back.
|
|
builder.addFunction("encode_utf8", kSig_e_r)
|
|
.exportFunc()
|
|
.addLocals(wasmRefNullType(kArrayI8), 1)
|
|
.addBody([
|
|
kExprLocalGet, 0,
|
|
kExprCallFunction, kStringToUtf8Array,
|
|
kExprLocalTee, 1,
|
|
|
|
kExprI32Const, 0, // start
|
|
kExprLocalGet, 1, kGCPrefix, kExprArrayLen, // end
|
|
kExprCallFunction, kStringFromUtf8Array,
|
|
]);
|
|
|
|
let sig_a8_v = makeSig([], [wasmRefType(kArrayI8)]);
|
|
builder.addFunction("encode_null_string", sig_a8_v)
|
|
.exportFunc()
|
|
.addBody([
|
|
kExprRefNull, kExternRefCode,
|
|
kExprCallFunction, kStringToUtf8Array,
|
|
]);
|
|
|
|
let instance = builder.instantiate(kImports, kBuiltins);
|
|
|
|
for (let str of interestingStrings) {
|
|
let replaced = ReplaceIsolatedSurrogates(str);
|
|
if (!HasIsolatedSurrogate(str)) assertEquals(str, replaced);
|
|
let wtf8 = encodeWtf8(replaced);
|
|
assertEquals(replaced,
|
|
instance.exports.encode_utf8(str, wtf8.length, 0));
|
|
assertEquals(replaced,
|
|
instance.exports.encode_utf8(str, wtf8.length + 20, 10));
|
|
}
|
|
|
|
assertThrows(() => instance.exports.encode_null_string(),
|
|
WebAssembly.RuntimeError, "illegal cast");
|
|
})();
|