get rid of multiple valueviews

anonrig · anonrig · commit 3a6ea7663c80 · 2025-11-03T13:00:07.000-05:00
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
@@ -467,108 +467,77 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
 
 jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
   jsg::JsString str = input.orDefault(js.str());
+  std::shared_ptr<v8::BackingStore> backingStore;
+  size_t utf8_length = 0;
 
+  // Fast path: check if string is one-byte before creating ValueView
   if (str.isOneByte(js)) {
     auto length = str.length(js);
-    // Fast path for one-byte strings (Latin-1). writeOneByte() copies the raw bytes without
-    // flattening the string, which is more efficient than using ValueView. Note that we
-    // allocate `length * 2` bytes because Latin-1 characters 0x80-0xFF need 2 bytes in UTF-8.
-    auto backing =
-        jsg::BackingStore::alloc<v8::Uint8Array>(js, length, jsg::Lock::AllocOption::UNINITIALIZED);
-    str.writeOneByte(
-        js, backing.asArrayPtr<kj::byte>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
-    auto backingData = reinterpret_cast<const char*>(backing.asArrayPtr<kj::byte>().begin());
+    // Allocate buffer for Latin-1. Use v8::ArrayBuffer::NewBackingStore to avoid creating
+    // JS objects during conversion.
+    backingStore = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, length);
+    auto backingData = reinterpret_cast<kj::byte*>(backingStore->Data());
+
+    str.writeOneByte(js, kj::ArrayPtr<kj::byte>(backingData, length),
+        jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
 
-    size_t utf8_length = simdutf::utf8_length_from_latin1(backingData, length);
+    utf8_length =
+        simdutf::utf8_length_from_latin1(reinterpret_cast<const char*>(backingData), length);
 
     if (utf8_length == length) {
-      return jsg::JsUint8Array(backing.createHandle(js).As<v8::Uint8Array>());
+      // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
+      auto array = v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, length);
+      return jsg::JsUint8Array(array);
     }
 
-    auto backing2 = jsg::BackingStore::alloc<v8::Uint8Array>(
-        js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-    auto written = simdutf::convert_latin1_to_utf8(
-        backingData, length, reinterpret_cast<char*>(backing2.asArrayPtr<kj::byte>().begin()));
-    KJ_DASSERT(backing2.size() == written);
-    return jsg::JsUint8Array(backing2.createHandle(js).As<v8::Uint8Array>());
+    // Need to convert Latin-1 to UTF-8
+    std::shared_ptr<v8::BackingStore> backingStore2 =
+        v8::ArrayBuffer::NewBackingStore(js.v8Isolate, utf8_length);
+    auto written = simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(backingData),
+        length, reinterpret_cast<char*>(backingStore2->Data()));
+    KJ_DASSERT(utf8_length == written);
+    auto array =
+        v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore2), 0, utf8_length);
+    return jsg::JsUint8Array(array);
   }
 
-  // First pass: Calculate the required UTF-8 output buffer size.
-  // We need to do this in a separate ValueView because:
-  // 1. ValueView holds the V8 heap lock, which prevents us from allocating new V8 objects
-  // 2. We must determine the exact output size before allocating the BackingStore
-  // 3. Once we know the size, we'll create a second ValueView to do the actual conversion
-  size_t utf8_length = 0;
-  bool isValidUtf16 = true;
-  // For invalid UTF-16 strings (with unpaired surrogates), we need to fix them to well-formed
-  // UTF-16 before calculating the UTF-8 length. We store the fixed version here so it can be
-  // reused in the second pass, avoiding the need to fix it twice.
-  kj::Array<char16_t> wellFormed;
-
+  // Two-byte string path
   {
+    // Note that ValueView flattens the string, if it's not already flattened
     v8::String::ValueView view(js.v8Isolate, str);
-    // One-byte strings are handled by the fast path above
-    KJ_DASSERT(!view.is_one_byte());
-
-    auto data = reinterpret_cast<const char16_t*>(view.data16());
     // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
     // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
-    isValidUtf16 = simdutf::validate_utf16le(data, view.length());
+    auto data = reinterpret_cast<const char16_t*>(view.data16());
+    bool isValidUtf16 = simdutf::validate_utf16le(data, view.length());
 
     if (isValidUtf16) {
-      // Common case: valid UTF-16, calculate UTF-8 length directly
+      // Common case: valid UTF-16, convert directly to UTF-8
       utf8_length = simdutf::utf8_length_from_utf16le(data, view.length());
+      backingStore = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, utf8_length);
+      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
+          data, view.length(), reinterpret_cast<char*>(backingStore->Data()));
+      KJ_DASSERT(written == utf8_length);
     } else {
       // Rare case: Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
       // unpaired surrogates must be replaced with U+FFFD (replacement character).
       // U+FFFD is 3 bytes in UTF-8, which means the UTF-8 length will differ from what
       // we'd calculate from the invalid UTF-16. We must fix the UTF-16 first, then
       // calculate the UTF-8 length from the well-formed version to get the correct size.
-      wellFormed = kj::heapArray<char16_t>(view.length());
+      auto wellFormed = kj::heapArray<char16_t>(view.length());
       simdutf::to_well_formed_utf16le(data, view.length(), wellFormed.begin());
       utf8_length = simdutf::utf8_length_from_utf16le(wellFormed.begin(), view.length());
+      backingStore = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, utf8_length);
+      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
+          wellFormed.begin(), wellFormed.size(), reinterpret_cast<char*>(backingStore->Data()));
+      KJ_DASSERT(written == utf8_length);
     }
   }  // ValueView destroyed here, releasing the heap lock
 
-  // Pre-allocate the jsg::BackingStore to avoid the copy overhead that would occur with
-  // BackingStore::from() in the v8 sandbox, since from() copies data when it's not already in the
-  // sandbox. By pre-allocating with alloc(), the memory is already in the sandbox and we can
-  // perform the conversion directly into it.
-  auto backing = jsg::BackingStore::alloc<v8::Uint8Array>(
-      js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-
-  // Second pass: Perform the actual UTF-8 conversion.
-  // We create a new ValueView here to access the string data again, now that we have a
-  // pre-allocated output buffer. The closure ensures the ValueView is destroyed before we
-  // return the result, which is important for proper V8 heap management.
-  [&]() {
-    v8::String::ValueView view(js.v8Isolate, str);
-    // One-byte strings are handled by the fast path above
-    KJ_DASSERT(!view.is_one_byte());
-
-    size_t length = static_cast<size_t>(view.length());
-    auto* output = backing.asArrayPtr<char>().begin();
-    auto data = reinterpret_cast<const char16_t*>(view.data16());
-
-    if (isValidUtf16) {
-      // Common case: valid UTF-16LE, convert directly to UTF-8
-      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(data, length, output);
-      KJ_DASSERT(written == backing.size());
-      return;
-    }
-
-    // Rare case: Invalid UTF-16LE with unpaired surrogates. We already fixed the UTF-16 to
-    // well-formed in the first pass (stored in wellFormed array), so now we just convert that
-    // fixed version to UTF-8. This reuses the wellFormed array created earlier, avoiding the
-    // need to fix the UTF-16 a second time.
-    [[maybe_unused]] auto written =
-        simdutf::convert_utf16le_to_utf8(wellFormed.begin(), wellFormed.size(), output);
-    KJ_DASSERT(written == backing.size());
-  }();  // ValueView destroyed here, releasing the heap lock
-
   // Now that ValueView is destroyed and the heap lock is released, it's safe to create V8 objects.
-  // Create the Uint8Array from the BackingStore and return it to JS.
-  return jsg::JsUint8Array(backing.createHandle(js).As<v8::Uint8Array>());
+  // Create the Uint8Array from the raw v8::BackingStore.
+  auto array =
+      v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
+  return jsg::JsUint8Array(array);
 }
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(