@@ -467,108 +467,77 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
467467
468468jsg::JsUint8Array TextEncoder::encode (jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
469469 jsg::JsString str = input.orDefault (js.str ());
470+ std::shared_ptr<v8::BackingStore> backingStore;
471+ size_t utf8_length = 0 ;
470472
473+ // Fast path: check if string is one-byte before creating ValueView
471474 if (str.isOneByte (js)) {
472475 auto length = str.length (js);
473- // Fast path for one-byte strings (Latin-1). writeOneByte() copies the raw bytes without
474- // flattening the string, which is more efficient than using ValueView. Note that we
475- // allocate `length * 2` bytes because Latin-1 characters 0x80-0xFF need 2 bytes in UTF-8.
476- auto backing =
477- jsg::BackingStore::alloc<v8::Uint8Array>(js, length, jsg::Lock::AllocOption::UNINITIALIZED);
478- str.writeOneByte (
479- js, backing.asArrayPtr <kj::byte>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
480- auto backingData = reinterpret_cast <const char *>(backing.asArrayPtr <kj::byte>().begin ());
476+ // Allocate buffer for Latin-1. Use v8::ArrayBuffer::NewBackingStore to avoid creating
477+ // JS objects during conversion.
478+ backingStore = v8::ArrayBuffer::NewBackingStore (js.v8Isolate , length);
479+ auto backingData = reinterpret_cast <kj::byte*>(backingStore->Data ());
480+
481+ str.writeOneByte (js, kj::ArrayPtr<kj::byte>(backingData, length),
482+ jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
481483
482- size_t utf8_length = simdutf::utf8_length_from_latin1 (backingData, length);
484+ utf8_length =
485+ simdutf::utf8_length_from_latin1 (reinterpret_cast <const char *>(backingData), length);
483486
484487 if (utf8_length == length) {
485- return jsg::JsUint8Array (backing.createHandle (js).As <v8::Uint8Array>());
488+ // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
489+ auto array = v8::Uint8Array::New (v8::ArrayBuffer::New (js.v8Isolate , backingStore), 0 , length);
490+ return jsg::JsUint8Array (array);
486491 }
487492
488- auto backing2 = jsg::BackingStore::alloc<v8::Uint8Array>(
489- js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
490- auto written = simdutf::convert_latin1_to_utf8 (
491- backingData, length, reinterpret_cast <char *>(backing2.asArrayPtr <kj::byte>().begin ()));
492- KJ_DASSERT (backing2.size () == written);
493- return jsg::JsUint8Array (backing2.createHandle (js).As <v8::Uint8Array>());
493+ // Need to convert Latin-1 to UTF-8
494+ std::shared_ptr<v8::BackingStore> backingStore2 =
495+ v8::ArrayBuffer::NewBackingStore (js.v8Isolate , utf8_length);
496+ auto written = simdutf::convert_latin1_to_utf8 (reinterpret_cast <const char *>(backingData),
497+ length, reinterpret_cast <char *>(backingStore2->Data ()));
498+ KJ_DASSERT (utf8_length == written);
499+ auto array =
500+ v8::Uint8Array::New (v8::ArrayBuffer::New (js.v8Isolate , backingStore2), 0 , utf8_length);
501+ return jsg::JsUint8Array (array);
494502 }
495503
496- // First pass: Calculate the required UTF-8 output buffer size.
497- // We need to do this in a separate ValueView because:
498- // 1. ValueView holds the V8 heap lock, which prevents us from allocating new V8 objects
499- // 2. We must determine the exact output size before allocating the BackingStore
500- // 3. Once we know the size, we'll create a second ValueView to do the actual conversion
501- size_t utf8_length = 0 ;
502- bool isValidUtf16 = true ;
503- // For invalid UTF-16 strings (with unpaired surrogates), we need to fix them to well-formed
504- // UTF-16 before calculating the UTF-8 length. We store the fixed version here so it can be
505- // reused in the second pass, avoiding the need to fix it twice.
506- kj::Array<char16_t > wellFormed;
507-
504+ // Two-byte string path
508505 {
506+ // Note that ValueView flattens the string, if it's not already flattened
509507 v8::String::ValueView view (js.v8Isolate , str);
510- // One-byte strings are handled by the fast path above
511- KJ_DASSERT (!view.is_one_byte ());
512-
513- auto data = reinterpret_cast <const char16_t *>(view.data16 ());
514508 // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
515509 // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
516- isValidUtf16 = simdutf::validate_utf16le (data, view.length ());
510+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
511+ bool isValidUtf16 = simdutf::validate_utf16le (data, view.length ());
517512
518513 if (isValidUtf16) {
519- // Common case: valid UTF-16, calculate UTF-8 length directly
514+ // Common case: valid UTF-16, convert directly to UTF-8
520515 utf8_length = simdutf::utf8_length_from_utf16le (data, view.length ());
516+ backingStore = v8::ArrayBuffer::NewBackingStore (js.v8Isolate , utf8_length);
517+ [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8 (
518+ data, view.length (), reinterpret_cast <char *>(backingStore->Data ()));
519+ KJ_DASSERT (written == utf8_length);
521520 } else {
522521 // Rare case: Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
523522 // unpaired surrogates must be replaced with U+FFFD (replacement character).
524523 // U+FFFD is 3 bytes in UTF-8, which means the UTF-8 length will differ from what
525524 // we'd calculate from the invalid UTF-16. We must fix the UTF-16 first, then
526525 // calculate the UTF-8 length from the well-formed version to get the correct size.
527- wellFormed = kj::heapArray<char16_t >(view.length ());
526+ auto wellFormed = kj::heapArray<char16_t >(view.length ());
528527 simdutf::to_well_formed_utf16le (data, view.length (), wellFormed.begin ());
529528 utf8_length = simdutf::utf8_length_from_utf16le (wellFormed.begin (), view.length ());
529+ backingStore = v8::ArrayBuffer::NewBackingStore (js.v8Isolate , utf8_length);
530+ [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8 (
531+ wellFormed.begin (), wellFormed.size (), reinterpret_cast <char *>(backingStore->Data ()));
532+ KJ_DASSERT (written == utf8_length);
530533 }
531534 } // ValueView destroyed here, releasing the heap lock
532535
533- // Pre-allocate the jsg::BackingStore to avoid the copy overhead that would occur with
534- // BackingStore::from() in the v8 sandbox, since from() copies data when it's not already in the
535- // sandbox. By pre-allocating with alloc(), the memory is already in the sandbox and we can
536- // perform the conversion directly into it.
537- auto backing = jsg::BackingStore::alloc<v8::Uint8Array>(
538- js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
539-
540- // Second pass: Perform the actual UTF-8 conversion.
541- // We create a new ValueView here to access the string data again, now that we have a
542- // pre-allocated output buffer. The closure ensures the ValueView is destroyed before we
543- // return the result, which is important for proper V8 heap management.
544- [&]() {
545- v8::String::ValueView view (js.v8Isolate , str);
546- // One-byte strings are handled by the fast path above
547- KJ_DASSERT (!view.is_one_byte ());
548-
549- size_t length = static_cast <size_t >(view.length ());
550- auto * output = backing.asArrayPtr <char >().begin ();
551- auto data = reinterpret_cast <const char16_t *>(view.data16 ());
552-
553- if (isValidUtf16) {
554- // Common case: valid UTF-16LE, convert directly to UTF-8
555- [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8 (data, length, output);
556- KJ_DASSERT (written == backing.size ());
557- return ;
558- }
559-
560- // Rare case: Invalid UTF-16LE with unpaired surrogates. We already fixed the UTF-16 to
561- // well-formed in the first pass (stored in wellFormed array), so now we just convert that
562- // fixed version to UTF-8. This reuses the wellFormed array created earlier, avoiding the
563- // need to fix the UTF-16 a second time.
564- [[maybe_unused]] auto written =
565- simdutf::convert_utf16le_to_utf8 (wellFormed.begin (), wellFormed.size (), output);
566- KJ_DASSERT (written == backing.size ());
567- }(); // ValueView destroyed here, releasing the heap lock
568-
569536 // Now that ValueView is destroyed and the heap lock is released, it's safe to create V8 objects.
570- // Create the Uint8Array from the BackingStore and return it to JS.
571- return jsg::JsUint8Array (backing.createHandle (js).As <v8::Uint8Array>());
537+ // Create the Uint8Array from the raw v8::BackingStore.
538+ auto array =
539+ v8::Uint8Array::New (v8::ArrayBuffer::New (js.v8Isolate , backingStore), 0 , utf8_length);
540+ return jsg::JsUint8Array (array);
572541}
573542
574543TextEncoder::EncodeIntoResult TextEncoder::encodeInto (
0 commit comments