v8/
string.rs

1use crate::Isolate;
2use crate::Local;
3use crate::String;
4use crate::binding::v8__String__kMaxLength;
5use crate::isolate::RealIsolate;
6use crate::scope::PinScope;
7use crate::support::Opaque;
8use crate::support::char;
9use crate::support::int;
10use crate::support::size_t;
11use std::borrow::Cow;
12use std::convert::TryInto;
13use std::default::Default;
14use std::ffi::c_void;
15use std::marker::PhantomData;
16use std::mem::MaybeUninit;
17use std::ptr::NonNull;
18use std::slice;
19
20/// Converts Latin-1 encoded bytes to UTF-8, writing into the output buffer.
21///
22/// The output buffer must have at least `2 * input_length` bytes of capacity,
23/// since each Latin-1 byte can expand to at most 2 UTF-8 bytes.
24///
25/// Returns the number of bytes written to the output buffer.
26///
27/// # Safety
28///
29/// - `inbuf` must point to at least `input_length` readable bytes.
30/// - `outbuf` must point to at least `2 * input_length` writable bytes.
31#[inline(always)]
32pub unsafe fn latin1_to_utf8(
33  input_length: usize,
34  inbuf: *const u8,
35  outbuf: *mut u8,
36) -> usize {
37  unsafe {
38    let mut output = 0;
39    let mut input = 0;
40
41    // Process 8 bytes at a time: check if all are ASCII with a single AND
42    while input + 8 <= input_length {
43      let chunk = (inbuf.add(input) as *const u64).read_unaligned();
44      if chunk & 0x8080_8080_8080_8080 == 0 {
45        // All 8 bytes are ASCII, copy in bulk
46        (outbuf.add(output) as *mut u64).write_unaligned(chunk);
47        input += 8;
48        output += 8;
49      } else {
50        // At least one non-ASCII byte, process individually
51        let end = input + 8;
52        while input < end {
53          let byte = *(inbuf.add(input));
54          if byte < 0x80 {
55            *(outbuf.add(output)) = byte;
56            output += 1;
57          } else {
58            // Latin-1 byte to two-byte UTF-8 sequence
59            *(outbuf.add(output)) = (byte >> 6) | 0b1100_0000;
60            *(outbuf.add(output + 1)) = (byte & 0b0011_1111) | 0b1000_0000;
61            output += 2;
62          }
63          input += 1;
64        }
65      }
66    }
67
68    // Handle remaining bytes
69    while input < input_length {
70      let byte = *(inbuf.add(input));
71      if byte < 0x80 {
72        *(outbuf.add(output)) = byte;
73        output += 1;
74      } else {
75        *(outbuf.add(output)) = (byte >> 6) | 0b1100_0000;
76        *(outbuf.add(output + 1)) = (byte & 0b0011_1111) | 0b1000_0000;
77        output += 2;
78      }
79      input += 1;
80    }
81    output
82  }
83}
84
85unsafe extern "C" {
86  fn v8__String__Empty(isolate: *mut RealIsolate) -> *const String;
87
88  fn v8__String__NewFromUtf8(
89    isolate: *mut RealIsolate,
90    data: *const char,
91    new_type: NewStringType,
92    length: int,
93  ) -> *const String;
94
95  fn v8__String__NewFromOneByte(
96    isolate: *mut RealIsolate,
97    data: *const u8,
98    new_type: NewStringType,
99    length: int,
100  ) -> *const String;
101
102  fn v8__String__NewFromTwoByte(
103    isolate: *mut RealIsolate,
104    data: *const u16,
105    new_type: NewStringType,
106    length: int,
107  ) -> *const String;
108
109  fn v8__String__Concat(
110    isolate: *mut RealIsolate,
111    left: *const String,
112    right: *const String,
113  ) -> *const String;
114
115  fn v8__String__Length(this: *const String) -> int;
116
117  fn v8__String__Utf8Length(
118    this: *const String,
119    isolate: *mut RealIsolate,
120  ) -> int;
121
122  fn v8__String__Write_v2(
123    this: *const String,
124    isolate: *mut RealIsolate,
125    offset: u32,
126    length: u32,
127    buffer: *mut u16,
128    flags: int,
129  );
130
131  fn v8__String__WriteOneByte_v2(
132    this: *const String,
133    isolate: *mut RealIsolate,
134    offset: u32,
135    length: u32,
136    buffer: *mut u8,
137    flags: int,
138  );
139
140  fn v8__String__WriteUtf8_v2(
141    this: *const String,
142    isolate: *mut RealIsolate,
143    buffer: *mut char,
144    capacity: size_t,
145    flags: int,
146    processed_characters_return: *mut size_t,
147  ) -> int;
148
149  fn v8__String__GetExternalStringResource(
150    this: *const String,
151  ) -> *mut ExternalStringResource;
152  fn v8__String__GetExternalStringResourceBase(
153    this: *const String,
154    encoding: *mut Encoding,
155  ) -> *mut ExternalStringResourceBase;
156
157  fn v8__String__NewExternalOneByteConst(
158    isolate: *mut RealIsolate,
159    onebyte_const: *const OneByteConst,
160  ) -> *const String;
161
162  fn v8__String__NewExternalOneByteStatic(
163    isolate: *mut RealIsolate,
164    buffer: *const char,
165    length: int,
166  ) -> *const String;
167
168  fn v8__String__NewExternalOneByte(
169    isolate: *mut RealIsolate,
170    buffer: *mut char,
171    length: size_t,
172    free: unsafe extern "C" fn(*mut char, size_t),
173  ) -> *const String;
174
175  fn v8__String__NewExternalTwoByteStatic(
176    isolate: *mut RealIsolate,
177    buffer: *const u16,
178    length: int,
179  ) -> *const String;
180
181  fn v8__String__NewExternalTwoByte(
182    isolate: *mut RealIsolate,
183    buffer: *mut u16,
184    length: size_t,
185    free: unsafe extern "C" fn(*mut u16, size_t),
186  ) -> *const String;
187
188  #[allow(dead_code)]
189  fn v8__String__IsExternal(this: *const String) -> bool;
190  fn v8__String__IsExternalOneByte(this: *const String) -> bool;
191  fn v8__String__IsExternalTwoByte(this: *const String) -> bool;
192  #[allow(dead_code)]
193  fn v8__String__IsOneByte(this: *const String) -> bool;
194  fn v8__String__ContainsOnlyOneByte(this: *const String) -> bool;
195  fn v8__ExternalOneByteStringResource__data(
196    this: *const ExternalOneByteStringResource,
197  ) -> *const char;
198  fn v8__ExternalOneByteStringResource__length(
199    this: *const ExternalOneByteStringResource,
200  ) -> size_t;
201
202  fn v8__String__ValueView__CONSTRUCT(
203    buf: *mut ValueView,
204    isolate: *mut RealIsolate,
205    string: *const String,
206  );
207  fn v8__String__ValueView__DESTRUCT(this: *mut ValueView);
208  fn v8__String__ValueView__is_one_byte(this: *const ValueView) -> bool;
209  fn v8__String__ValueView__data(this: *const ValueView) -> *const c_void;
210  fn v8__String__ValueView__length(this: *const ValueView) -> int;
211}
212
213#[derive(PartialEq, Debug)]
214#[repr(C)]
215pub enum Encoding {
216  Unknown = 0x1,
217  TwoByte = 0x2,
218  OneByte = 0x8,
219}
220
221#[repr(C)]
222pub struct ExternalStringResource(Opaque);
223
224#[repr(C)]
225pub struct ExternalStringResourceBase(Opaque);
226
227#[repr(C)]
228/// An external, one-byte string resource.
229/// This corresponds with `v8::String::ExternalOneByteStringResource`.
230///
231/// Note: The data contained in a one-byte string resource is guaranteed to be
232/// Latin-1 data. It is not safe to assume that it is valid UTF-8, as Latin-1
233/// only has commonality with UTF-8 in the ASCII range and differs beyond that.
234pub struct ExternalOneByteStringResource(Opaque);
235
236impl ExternalOneByteStringResource {
237  /// Returns a pointer to the data owned by this resource.
238  /// This pointer is valid as long as the resource is alive.
239  /// The data is guaranteed to be Latin-1.
240  #[inline]
241  pub fn data(&self) -> *const char {
242    unsafe { v8__ExternalOneByteStringResource__data(self) }
243  }
244
245  /// Returns the length of the data owned by this resource.
246  #[inline]
247  pub fn length(&self) -> usize {
248    unsafe { v8__ExternalOneByteStringResource__length(self) }
249  }
250
251  /// Returns the data owned by this resource as a string slice.
252  /// The data is guaranteed to be Latin-1.
253  #[inline]
254  pub fn as_bytes(&self) -> &[u8] {
255    let len = self.length();
256    if len == 0 {
257      &[]
258    } else {
259      // SAFETY: We know this is Latin-1
260      unsafe { std::slice::from_raw_parts(self.data().cast(), len) }
261    }
262  }
263}
264
265/// A static ASCII string resource for usage in V8, created at build time.
266#[repr(C)]
267#[derive(Copy, Clone, Debug)]
268pub struct OneByteConst {
269  vtable: *const OneByteConstNoOp,
270  cached_data: *const char,
271  length: usize,
272}
273
274impl OneByteConst {
275  /// `const` function that returns this string as a string reference.
276  #[inline(always)]
277  pub const fn as_str(&self) -> &str {
278    if self.length == 0 {
279      ""
280    } else {
281      // SAFETY: We know this is ASCII and length > 0
282      unsafe {
283        std::str::from_utf8_unchecked(std::slice::from_raw_parts(
284          self.cached_data as _,
285          self.length,
286        ))
287      }
288    }
289  }
290}
291
292impl AsRef<str> for OneByteConst {
293  #[inline(always)]
294  fn as_ref(&self) -> &str {
295    self.as_str()
296  }
297}
298
299impl AsRef<[u8]> for OneByteConst {
300  #[inline(always)]
301  fn as_ref(&self) -> &[u8] {
302    self.as_str().as_bytes()
303  }
304}
305
306impl std::ops::Deref for OneByteConst {
307  type Target = str;
308  #[inline(always)]
309  fn deref(&self) -> &Self::Target {
310    self.as_ref()
311  }
312}
313
314// SAFETY: The vtable for OneByteConst is an immutable static and all
315// of the included functions are thread-safe, the cached_data pointer
316// is never changed and points to a static ASCII string, and the
317// length is likewise never changed. Thus, it is safe to share the
318// OneByteConst across threads. This means that multiple isolates
319// can use the same OneByteConst statics simultaneously.
320unsafe impl Sync for OneByteConst {}
321
322unsafe extern "C" fn one_byte_const_no_op(_this: *const OneByteConst) {}
323unsafe extern "C" fn one_byte_const_is_cacheable(
324  _this: *const OneByteConst,
325) -> bool {
326  true
327}
328unsafe extern "C" fn one_byte_const_data(
329  this: *const OneByteConst,
330) -> *const char {
331  // SAFETY: Only called from C++ with a valid OneByteConst pointer.
332  unsafe { (*this).cached_data }
333}
334unsafe extern "C" fn one_byte_const_length(this: *const OneByteConst) -> usize {
335  // SAFETY: Only called from C++ with a valid OneByteConst pointer.
336  unsafe { (*this).length }
337}
338unsafe extern "C" fn one_byte_const_unaccount(
339  _this: *const OneByteConst,
340  _isolate: *mut RealIsolate,
341) {
342}
343unsafe extern "C" fn one_byte_const_estimate_memory_usage(
344  _this: *const OneByteConst,
345) -> size_t {
346  usize::MAX // ExternalStringResource::kDefaultMemoryEstimate
347}
348unsafe extern "C" fn one_byte_const_estimate_shared_memory_usage(
349  _this: *const OneByteConst,
350  _recorder: *mut (),
351) {
352}
353
354type OneByteConstNoOp = unsafe extern "C" fn(*const OneByteConst);
355type OneByteConstIsCacheable =
356  unsafe extern "C" fn(*const OneByteConst) -> bool;
357type OneByteConstData =
358  unsafe extern "C" fn(*const OneByteConst) -> *const char;
359type OneByteConstLength = unsafe extern "C" fn(*const OneByteConst) -> usize;
360type OneByteConstUnaccount =
361  unsafe extern "C" fn(*const OneByteConst, *mut RealIsolate);
362type OneByteConstEstimateMemoryUsage =
363  unsafe extern "C" fn(*const OneByteConst) -> size_t;
364type OneByteConstEstimateSharedMemoryUsage =
365  unsafe extern "C" fn(*const OneByteConst, *mut ());
366
367#[repr(C)]
368struct OneByteConstVtable {
369  #[cfg(target_family = "windows")]
370  // In SysV / Itanium ABI -0x10 offset of the vtable
371  // tells how many bytes the vtable pointer pointing to
372  // this vtable is offset from the base class. For
373  // single inheritance this is always 0.
374  _offset_to_top: usize,
375  // In Itanium ABI the -0x08 offset contains the type_info
376  // pointer, and in MSVC it contains the RTTI Complete Object
377  // Locator pointer. V8 is normally compiled with `-fno-rtti`
378  // meaning that this pointer is a nullptr on both
379  // Itanium and MSVC.
380  _typeinfo: *const (),
381  // After the metadata fields come the virtual function
382  // pointers. The vtable pointer in a class instance points
383  // to the first virtual function pointer, making this
384  // the 0x00 offset of the table.
385  // The order of the virtual function pointers is determined
386  // by their order of declaration in the classes.
387  delete1: OneByteConstNoOp,
388  // In SysV / Itanium ABI, a class vtable includes the
389  // deleting destructor and the compete object destructor.
390  // In MSVC, it only includes the deleting destructor.
391  #[cfg(not(target_family = "windows"))]
392  delete2: OneByteConstNoOp,
393  is_cacheable: OneByteConstIsCacheable,
394  unaccount: OneByteConstUnaccount,
395  estimate_memory_usage: OneByteConstEstimateMemoryUsage,
396  estimate_shared_memory_usage: OneByteConstEstimateSharedMemoryUsage,
397  dispose: OneByteConstNoOp,
398  lock: OneByteConstNoOp,
399  unlock: OneByteConstNoOp,
400  data: OneByteConstData,
401  length: OneByteConstLength,
402}
403
404const ONE_BYTE_CONST_VTABLE: OneByteConstVtable = OneByteConstVtable {
405  #[cfg(target_family = "windows")]
406  _offset_to_top: 0,
407  _typeinfo: std::ptr::null(),
408  delete1: one_byte_const_no_op,
409  #[cfg(not(target_family = "windows"))]
410  delete2: one_byte_const_no_op,
411  is_cacheable: one_byte_const_is_cacheable,
412  unaccount: one_byte_const_unaccount,
413  estimate_memory_usage: one_byte_const_estimate_memory_usage,
414  estimate_shared_memory_usage: one_byte_const_estimate_shared_memory_usage,
415  dispose: one_byte_const_no_op,
416  lock: one_byte_const_no_op,
417  unlock: one_byte_const_no_op,
418  data: one_byte_const_data,
419  length: one_byte_const_length,
420};
421
422#[repr(C)]
423#[derive(Debug, Default)]
424pub enum NewStringType {
425  #[default]
426  Normal,
427  Internalized,
428}
429
430bitflags! {
431  #[derive(Clone, Copy, Default)]
432  #[repr(transparent)]
433  pub struct WriteOptions: int {
434    const NO_OPTIONS = 0;
435    const HINT_MANY_WRITES_EXPECTED = 1;
436    const NO_NULL_TERMINATION = 2;
437    const PRESERVE_ONE_BYTE_NULL = 4;
438    // Used by WriteUtf8 to replace orphan surrogate code units with the
439    // unicode replacement character. Needs to be set to guarantee valid UTF-8
440    // output.
441    const REPLACE_INVALID_UTF8 = 8;
442  }
443}
444
445bitflags! {
446  #[derive(Clone, Copy, Default)]
447  #[repr(transparent)]
448  pub struct WriteFlags: int {
449    const kNullTerminate = crate::binding::v8_String_WriteFlags_kNullTerminate as _;
450    const kReplaceInvalidUtf8 = crate::binding::v8_String_WriteFlags_kReplaceInvalidUtf8 as _;
451  }
452}
453
454impl String {
455  /// The maximum length (in bytes) of a buffer that a v8::String can be built
456  /// from. Attempting to create a v8::String from a larger buffer will result
457  /// in None being returned.
458  pub const MAX_LENGTH: usize = v8__String__kMaxLength as _;
459
460  #[inline(always)]
461  pub fn empty<'s>(scope: &PinScope<'s, '_, ()>) -> Local<'s, String> {
462    // FIXME(bnoordhuis) v8__String__Empty() is infallible so there
463    // is no need to box up the result, only to unwrap it again.
464    unsafe { scope.cast_local(|sd| v8__String__Empty(sd.get_isolate_ptr())) }
465      .unwrap()
466  }
467
468  /// Allocates a new string from UTF-8 data. Only returns an empty value when
469  /// length > kMaxLength
470  #[inline(always)]
471  pub fn new_from_utf8<'s>(
472    scope: &PinScope<'s, '_, ()>,
473    buffer: &[u8],
474    new_type: NewStringType,
475  ) -> Option<Local<'s, String>> {
476    if buffer.is_empty() {
477      return Some(Self::empty(scope));
478    }
479    let buffer_len = buffer.len().try_into().ok()?;
480    unsafe {
481      scope.cast_local(|sd| {
482        v8__String__NewFromUtf8(
483          sd.get_isolate_ptr(),
484          buffer.as_ptr() as *const char,
485          new_type,
486          buffer_len,
487        )
488      })
489    }
490  }
491
492  /// Allocates a new string from Latin-1 data.  Only returns an empty value when
493  /// length > kMaxLength.
494  #[inline(always)]
495  pub fn new_from_one_byte<'s>(
496    scope: &PinScope<'s, '_, ()>,
497    buffer: &[u8],
498    new_type: NewStringType,
499  ) -> Option<Local<'s, String>> {
500    let buffer_len = buffer.len().try_into().ok()?;
501    unsafe {
502      scope.cast_local(|sd| {
503        v8__String__NewFromOneByte(
504          sd.get_isolate_ptr(),
505          buffer.as_ptr(),
506          new_type,
507          buffer_len,
508        )
509      })
510    }
511  }
512
513  /// Allocates a new string from UTF-16 data. Only returns an empty value when
514  /// length > kMaxLength.
515  #[inline(always)]
516  pub fn new_from_two_byte<'s>(
517    scope: &PinScope<'s, '_, ()>,
518    buffer: &[u16],
519    new_type: NewStringType,
520  ) -> Option<Local<'s, String>> {
521    let buffer_len = buffer.len().try_into().ok()?;
522    unsafe {
523      scope.cast_local(|sd| {
524        v8__String__NewFromTwoByte(
525          sd.get_isolate_ptr(),
526          buffer.as_ptr(),
527          new_type,
528          buffer_len,
529        )
530      })
531    }
532  }
533
534  /// Returns the number of characters (UTF-16 code units) in this string.
535  #[inline(always)]
536  pub fn length(&self) -> usize {
537    unsafe { v8__String__Length(self) as usize }
538  }
539
540  /// Returns the number of bytes in the UTF-8 encoded representation of this
541  /// string.
542  #[inline(always)]
543  pub fn utf8_length(&self, scope: &Isolate) -> usize {
544    unsafe { v8__String__Utf8Length(self, scope.as_real_ptr()) as usize }
545  }
546
547  /// Writes the contents of the string to an external buffer, as 16-bit
548  /// (UTF-16) character codes.
549  #[inline(always)]
550  pub fn write_v2(
551    &self,
552    scope: &Isolate,
553    offset: u32,
554    buffer: &mut [u16],
555    flags: WriteFlags,
556  ) {
557    unsafe {
558      v8__String__Write_v2(
559        self,
560        scope.as_real_ptr(),
561        offset,
562        self.length().min(buffer.len()) as _,
563        buffer.as_mut_ptr(),
564        flags.bits(),
565      )
566    }
567  }
568
569  /// Writes the contents of the string to an external buffer, as one-byte
570  /// (Latin-1) characters.
571  #[inline(always)]
572  pub fn write_one_byte_v2(
573    &self,
574    scope: &Isolate,
575    offset: u32,
576    buffer: &mut [u8],
577    flags: WriteFlags,
578  ) {
579    unsafe {
580      v8__String__WriteOneByte_v2(
581        self,
582        scope.as_real_ptr(),
583        offset,
584        self.length().min(buffer.len()) as _,
585        buffer.as_mut_ptr(),
586        flags.bits(),
587      )
588    }
589  }
590
591  /// Writes the contents of the string to an external [`MaybeUninit`] buffer, as one-byte
592  /// (Latin-1) characters.
593  #[inline(always)]
594  pub fn write_one_byte_uninit_v2(
595    &self,
596    scope: &Isolate,
597    offset: u32,
598    buffer: &mut [MaybeUninit<u8>],
599    flags: WriteFlags,
600  ) {
601    unsafe {
602      v8__String__WriteOneByte_v2(
603        self,
604        scope.as_real_ptr(),
605        offset,
606        self.length().min(buffer.len()) as _,
607        buffer.as_mut_ptr() as _,
608        flags.bits(),
609      )
610    }
611  }
612
613  /// Writes the contents of the string to an external buffer, as UTF-8.
614  #[inline(always)]
615  pub fn write_utf8_v2(
616    &self,
617    scope: &Isolate,
618    buffer: &mut [u8],
619    flags: WriteFlags,
620    processed_characters_return: Option<&mut usize>,
621  ) -> usize {
622    unsafe {
623      // SAFETY:
624      // We assume that v8 will overwrite the buffer without de-initializing any byte in it.
625      // So the type casting of the buffer is safe.
626
627      let buffer = {
628        let len = buffer.len();
629        let data = buffer.as_mut_ptr().cast();
630        slice::from_raw_parts_mut(data, len)
631      };
632      self.write_utf8_uninit_v2(
633        scope,
634        buffer,
635        flags,
636        processed_characters_return,
637      )
638    }
639  }
640
641  /// Writes the contents of the string to an external [`MaybeUninit`] buffer, as UTF-8.
642  pub fn write_utf8_uninit_v2(
643    &self,
644    scope: &Isolate,
645    buffer: &mut [MaybeUninit<u8>],
646    flags: WriteFlags,
647    processed_characters_return: Option<&mut usize>,
648  ) -> usize {
649    let bytes = unsafe {
650      v8__String__WriteUtf8_v2(
651        self,
652        scope.as_real_ptr(),
653        buffer.as_mut_ptr() as _,
654        buffer.len(),
655        flags.bits(),
656        processed_characters_return
657          .map(|p| p as *mut _)
658          .unwrap_or(std::ptr::null_mut()),
659      )
660    };
661    bytes as usize
662  }
663
664  // Convenience function not present in the original V8 API.
665  #[inline(always)]
666  pub fn new<'s>(
667    scope: &PinScope<'s, '_, ()>,
668    value: &str,
669  ) -> Option<Local<'s, String>> {
670    Self::new_from_utf8(scope, value.as_ref(), NewStringType::Normal)
671  }
672
673  /// Creates a new string by concatenating `left` and `right`.
674  /// Returns `None` if the resulting string would exceed
675  /// `v8::String::kMaxLength`.
676  #[inline(always)]
677  pub fn concat<'s>(
678    scope: &PinScope<'s, '_, ()>,
679    left: Local<String>,
680    right: Local<String>,
681  ) -> Option<Local<'s, String>> {
682    unsafe {
683      scope.cast_local(|sd| {
684        v8__String__Concat(sd.get_isolate_ptr(), &*left, &*right)
685      })
686    }
687  }
688
689  /// Compile-time function to create an external string resource.
690  /// The buffer is checked to contain only ASCII characters.
691  #[inline(always)]
692  pub const fn create_external_onebyte_const(
693    buffer: &'static [u8],
694  ) -> OneByteConst {
695    // Assert that the buffer contains only ASCII, and that the
696    // length is less or equal to (64-bit) v8::String::kMaxLength.
697    assert!(buffer.is_ascii() && buffer.len() <= ((1 << 29) - 24));
698    OneByteConst {
699      vtable: &ONE_BYTE_CONST_VTABLE.delete1,
700      cached_data: buffer.as_ptr() as *const char,
701      length: buffer.len(),
702    }
703  }
704
705  /// Compile-time function to create an external string resource which
706  /// skips the ASCII and length checks.
707  ///
708  /// ## Safety
709  ///
710  /// The passed in buffer must contain only ASCII data. Note that while V8
711  /// allows OneByte string resources to contain Latin-1 data, the OneByteConst
712  /// struct does not allow it.
713  #[inline(always)]
714  pub const unsafe fn create_external_onebyte_const_unchecked(
715    buffer: &'static [u8],
716  ) -> OneByteConst {
717    OneByteConst {
718      vtable: &ONE_BYTE_CONST_VTABLE.delete1,
719      cached_data: buffer.as_ptr() as *const char,
720      length: buffer.len(),
721    }
722  }
723
724  /// Creates a v8::String from a `&'static OneByteConst`
725  /// which is guaranteed to be ASCII.
726  ///
727  /// Note that OneByteConst guarantees ASCII even though V8 would allow
728  /// OneByte string resources to contain Latin-1.
729  #[inline(always)]
730  pub fn new_from_onebyte_const<'s>(
731    scope: &PinScope<'s, '_, ()>,
732    onebyte_const: &'static OneByteConst,
733  ) -> Option<Local<'s, String>> {
734    unsafe {
735      scope.cast_local(|sd| {
736        v8__String__NewExternalOneByteConst(sd.get_isolate_ptr(), onebyte_const)
737      })
738    }
739  }
740
741  /// Creates a v8::String from a `&'static [u8]`,
742  /// must be Latin-1 or ASCII, not UTF-8!
743  #[inline(always)]
744  pub fn new_external_onebyte_static<'s>(
745    scope: &PinScope<'s, '_, ()>,
746    buffer: &'static [u8],
747  ) -> Option<Local<'s, String>> {
748    let buffer_len = buffer.len().try_into().ok()?;
749    unsafe {
750      scope.cast_local(|sd| {
751        v8__String__NewExternalOneByteStatic(
752          sd.get_isolate_ptr(),
753          buffer.as_ptr() as *const char,
754          buffer_len,
755        )
756      })
757    }
758  }
759
760  /// Creates a `v8::String` from owned bytes.
761  /// The bytes must be Latin-1 or ASCII.
762  /// V8 will take ownership of the buffer and free it when the string is garbage collected.
763  #[inline(always)]
764  pub fn new_external_onebyte<'s>(
765    scope: &PinScope<'s, '_, ()>,
766    buffer: Box<[u8]>,
767  ) -> Option<Local<'s, String>> {
768    let buffer_len = buffer.len();
769    unsafe {
770      scope.cast_local(|sd| {
771        v8__String__NewExternalOneByte(
772          sd.get_isolate_ptr(),
773          Box::into_raw(buffer).cast::<char>(),
774          buffer_len,
775          free_rust_external_onebyte,
776        )
777      })
778    }
779  }
780
781  /// Creates a `v8::String` from owned bytes, length, and a custom destructor.
782  /// The bytes must be Latin-1 or ASCII.
783  /// V8 will take ownership of the buffer and free it when the string is garbage collected.
784  ///
785  /// SAFETY: `buffer` must be owned (valid for the lifetime of the string), and
786  /// `destructor` must be a valid function pointer that can free the buffer.
787  /// The destructor will be called with the buffer and length when the string is garbage collected.
788  #[inline(always)]
789  pub unsafe fn new_external_onebyte_raw<'s>(
790    scope: &PinScope<'s, '_, ()>,
791    buffer: *mut char,
792    buffer_len: usize,
793    destructor: unsafe extern "C" fn(*mut char, usize),
794  ) -> Option<Local<'s, String>> {
795    unsafe {
796      scope.cast_local(|sd| {
797        v8__String__NewExternalOneByte(
798          sd.get_isolate_ptr(),
799          buffer,
800          buffer_len,
801          destructor,
802        )
803      })
804    }
805  }
806
807  /// Creates a v8::String from a `&'static [u16]`.
808  #[inline(always)]
809  pub fn new_external_twobyte_static<'s>(
810    scope: &PinScope<'s, '_, ()>,
811    buffer: &'static [u16],
812  ) -> Option<Local<'s, String>> {
813    let buffer_len = buffer.len().try_into().ok()?;
814    unsafe {
815      scope.cast_local(|sd| {
816        v8__String__NewExternalTwoByteStatic(
817          sd.get_isolate_ptr(),
818          buffer.as_ptr(),
819          buffer_len,
820        )
821      })
822    }
823  }
824
825  /// Creates a `v8::String` from owned two-byte (UTF-16) data.
826  /// V8 will take ownership of the buffer and free it when the string
827  /// is garbage collected.
828  #[inline(always)]
829  pub fn new_external_twobyte<'s>(
830    scope: &PinScope<'s, '_, ()>,
831    buffer: Box<[u16]>,
832  ) -> Option<Local<'s, String>> {
833    let buffer_len = buffer.len();
834    unsafe {
835      scope.cast_local(|sd| {
836        v8__String__NewExternalTwoByte(
837          sd.get_isolate_ptr(),
838          Box::into_raw(buffer).cast::<u16>(),
839          buffer_len,
840          free_rust_external_twobyte,
841        )
842      })
843    }
844  }
845
846  /// Creates a `v8::String` from owned two-byte (UTF-16) data, length,
847  /// and a custom destructor.
848  /// V8 will take ownership of the buffer and call the destructor when
849  /// the string is garbage collected.
850  ///
851  /// # Safety
852  ///
853  /// `buffer` must be owned (valid for the lifetime of the string), and
854  /// `destructor` must be a valid function pointer that can free the
855  /// buffer. The destructor will be called with the buffer and length
856  /// when the string is garbage collected.
857  #[inline(always)]
858  pub unsafe fn new_external_twobyte_raw<'s>(
859    scope: &PinScope<'s, '_, ()>,
860    buffer: *mut u16,
861    buffer_len: usize,
862    destructor: unsafe extern "C" fn(*mut u16, usize),
863  ) -> Option<Local<'s, String>> {
864    unsafe {
865      scope.cast_local(|sd| {
866        v8__String__NewExternalTwoByte(
867          sd.get_isolate_ptr(),
868          buffer,
869          buffer_len,
870          destructor,
871        )
872      })
873    }
874  }
875
876  /// Get the ExternalStringResource for an external string.
877  ///
878  /// Returns None if is_external() doesn't return true.
879  #[inline]
880  pub fn get_external_string_resource(
881    &self,
882  ) -> Option<NonNull<ExternalStringResource>> {
883    NonNull::new(unsafe { v8__String__GetExternalStringResource(self) })
884  }
885
886  /// Get the ExternalOneByteStringResource for an external one-byte string.
887  ///
888  /// Returns None if is_external_onebyte() doesn't return true.
889  #[inline]
890  pub fn get_external_onebyte_string_resource(
891    &self,
892  ) -> Option<NonNull<ExternalOneByteStringResource>> {
893    let (base, encoding) = self.get_external_string_resource_base();
894    let base = base?;
895    if encoding != Encoding::OneByte {
896      return None;
897    }
898
899    Some(base.cast())
900  }
901
902  /// Get the ExternalStringResourceBase for an external string.
903  /// Note this is just the base class, and isn't very useful on its own.
904  /// You'll want to downcast to one of its subclasses, for instance
905  /// with `get_external_onebyte_string_resource`.
906  pub fn get_external_string_resource_base(
907    &self,
908  ) -> (Option<NonNull<ExternalStringResourceBase>>, Encoding) {
909    let mut encoding = Encoding::Unknown;
910    (
911      NonNull::new(unsafe {
912        v8__String__GetExternalStringResourceBase(self, &mut encoding)
913      }),
914      encoding,
915    )
916  }
917
918  /// True if string is external
919  #[inline(always)]
920  pub fn is_external(&self) -> bool {
921    // TODO: re-enable on next v8-release
922    // Right now it fallbacks to Value::IsExternal, which is incorrect
923    // See: https://source.chromium.org/chromium/_/chromium/v8/v8.git/+/1dd8624b524d14076160c1743f7da0b20fbe68e0
924    // unsafe { v8__String__IsExternal(self) }
925
926    // Fallback for now (though functionally identical)
927    self.is_external_onebyte() || self.is_external_twobyte()
928  }
929
930  /// True if string is external & one-byte
931  /// (e.g: created with new_external_onebyte_static)
932  #[inline(always)]
933  pub fn is_external_onebyte(&self) -> bool {
934    unsafe { v8__String__IsExternalOneByte(self) }
935  }
936
937  /// True if string is external & two-byte
938  /// (e.g: created with new_external_twobyte_static)
939  #[inline(always)]
940  pub fn is_external_twobyte(&self) -> bool {
941    unsafe { v8__String__IsExternalTwoByte(self) }
942  }
943
944  /// Will return true if and only if string is known for certain to contain only one-byte data,
945  /// ie: Latin-1, a.k.a. ISO-8859-1 code points. Doesn't read the string so can return false
946  /// negatives, and a return value of false does not mean this string is not one-byte data.
947  ///
948  /// For a method that will not return false negatives at the cost of
949  /// potentially reading the entire string, use [`contains_only_onebyte()`].
950  ///
951  /// [`contains_only_onebyte()`]: String::contains_only_onebyte
952  #[inline(always)]
953  pub fn is_onebyte(&self) -> bool {
954    unsafe { v8__String__IsOneByte(self) }
955  }
956
957  /// True if the string contains only one-byte data.
958  /// Will read the entire string in some cases.
959  #[inline(always)]
960  pub fn contains_only_onebyte(&self) -> bool {
961    unsafe { v8__String__ContainsOnlyOneByte(self) }
962  }
963
964  /// Creates a copy of a [`crate::String`] in a [`std::string::String`].
965  /// Convenience function not present in the original V8 API.
966  ///
967  /// Uses [`ValueView`] internally for single-pass access to the string
968  /// data. When the `simdutf` feature is enabled, uses SIMD-accelerated
969  /// transcoding for Latin-1 and two-byte strings.
970  pub fn to_rust_string_lossy(&self, scope: &Isolate) -> std::string::String {
971    if self.length() == 0 {
972      return std::string::String::new();
973    }
974
975    // SAFETY: `self` is a valid V8 string reachable from a handle scope.
976    let view = unsafe { ValueView::new_from_ref(scope, self) };
977
978    match view.data() {
979      ValueViewData::OneByte(bytes) => {
980        if bytes.is_ascii() {
981          // SAFETY: ASCII is valid UTF-8.
982          unsafe { std::str::from_utf8_unchecked(bytes) }.to_owned()
983        } else {
984          latin1_to_string(bytes)
985        }
986      }
987      ValueViewData::TwoByte(units) => wtf16_to_string(units),
988    }
989  }
990
991  /// Writes the UTF-8 representation of this string into an existing
992  /// [`std::string::String`], reusing its allocation.
993  ///
994  /// The buffer is cleared first, then filled with the string's UTF-8
995  /// contents. This avoids repeated heap allocation when converting
996  /// many V8 strings — callers can keep a single `String` and reuse it.
997  ///
998  /// Uses [`ValueView`] internally for single-pass access, avoiding
999  /// the extra `utf8_length` FFI call.
1000  pub fn write_utf8_into(
1001    &self,
1002    scope: &mut Isolate,
1003    buf: &mut std::string::String,
1004  ) {
1005    buf.clear();
1006    let len = self.length();
1007    if len == 0 {
1008      return;
1009    }
1010
1011    // SAFETY: `self` is a valid V8 string reachable from a handle scope.
1012    // The ValueView is dropped before we return.
1013    let view = unsafe { ValueView::new_from_ref(scope, self) };
1014
1015    match view.data() {
1016      ValueViewData::OneByte(bytes) => {
1017        if bytes.is_ascii() {
1018          // ASCII: direct copy, already valid UTF-8.
1019          buf.reserve(bytes.len());
1020          unsafe {
1021            let vec = buf.as_mut_vec();
1022            std::ptr::copy_nonoverlapping(
1023              bytes.as_ptr(),
1024              vec.as_mut_ptr(),
1025              bytes.len(),
1026            );
1027            vec.set_len(bytes.len());
1028          }
1029        } else {
1030          // Latin-1: each byte can expand to at most 2 UTF-8 bytes.
1031          let max_utf8_len = bytes.len() * 2;
1032          buf.reserve(max_utf8_len);
1033          unsafe {
1034            let vec = buf.as_mut_vec();
1035            let written =
1036              latin1_to_utf8(bytes.len(), bytes.as_ptr(), vec.as_mut_ptr());
1037            vec.set_len(written);
1038          }
1039        }
1040      }
1041      ValueViewData::TwoByte(units) => {
1042        wtf16_into_string(units, buf);
1043      }
1044    }
1045  }
1046
1047  /// Converts a [`crate::String`] to either an owned [`std::string::String`],
1048  /// or a borrowed [`str`], depending on whether it fits into the provided
1049  /// buffer.
1050  ///
1051  /// Uses [`ValueView`] internally for direct access to the string's
1052  /// contents, eliminating the `utf8_length` pre-scan that the previous
1053  /// implementation required.
1054  pub fn to_rust_cow_lossy<'a, const N: usize>(
1055    &self,
1056    scope: &mut Isolate,
1057    buffer: &'a mut [MaybeUninit<u8>; N],
1058  ) -> Cow<'a, str> {
1059    let len = self.length();
1060    if len == 0 {
1061      return "".into();
1062    }
1063
1064    // SAFETY: `self` is a valid V8 string reachable from a handle scope.
1065    // The ValueView is dropped before we return, so the
1066    // DisallowGarbageCollection scope it holds is properly scoped.
1067    let view = unsafe { ValueView::new_from_ref(scope, self) };
1068
1069    match view.data() {
1070      ValueViewData::OneByte(bytes) => {
1071        if bytes.is_ascii() {
1072          // ASCII: direct memcpy, no transcoding needed.
1073          if bytes.len() <= N {
1074            unsafe {
1075              std::ptr::copy_nonoverlapping(
1076                bytes.as_ptr(),
1077                buffer.as_mut_ptr() as *mut u8,
1078                bytes.len(),
1079              );
1080              let buf = &mut buffer[..bytes.len()];
1081              let buf = &mut *(buf as *mut [_] as *mut [u8]);
1082              Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1083            }
1084          } else {
1085            // SAFETY: ASCII bytes are valid UTF-8.
1086            unsafe {
1087              Cow::Owned(std::string::String::from_utf8_unchecked(
1088                bytes.to_vec(),
1089              ))
1090            }
1091          }
1092        } else {
1093          latin1_to_cow_str(bytes, buffer)
1094        }
1095      }
1096      ValueViewData::TwoByte(units) => wtf16_to_cow_str(units, buffer),
1097    }
1098  }
1099}
1100
1101#[inline]
1102pub unsafe extern "C" fn free_rust_external_onebyte(s: *mut char, len: usize) {
1103  unsafe {
1104    let slice = std::slice::from_raw_parts_mut(s, len);
1105
1106    // Drop the slice
1107    drop(Box::from_raw(slice));
1108  }
1109}
1110
1111#[inline]
1112pub unsafe extern "C" fn free_rust_external_twobyte(s: *mut u16, len: usize) {
1113  unsafe {
1114    let slice = std::slice::from_raw_parts_mut(s, len);
1115    drop(Box::from_raw(slice));
1116  }
1117}
1118
1119#[derive(Debug, PartialEq)]
1120pub enum ValueViewData<'s> {
1121  OneByte(&'s [u8]),
1122  TwoByte(&'s [u16]),
1123}
1124
1125/// Returns a view onto a string's contents.
1126///
1127/// WARNING: This does not copy the string's contents, and will therefore be
1128/// invalidated if the GC can move the string while the ValueView is alive. It
1129/// is therefore required that no GC or allocation can happen while there is an
1130/// active ValueView. This requirement may be relaxed in the future.
1131///
1132/// V8 strings are either encoded as one-byte or two-bytes per character.
1133#[repr(C)]
1134pub struct ValueView<'s>(
1135  [u8; crate::binding::v8__String__ValueView_SIZE],
1136  PhantomData<&'s ()>,
1137);
1138
1139impl<'s> ValueView<'s> {
1140  #[inline(always)]
1141  pub fn new(isolate: &mut Isolate, string: Local<'s, String>) -> Self {
1142    // SAFETY: Local<'s, String> guarantees the V8 string is rooted in a
1143    // HandleScope that lives for at least 's.  Deref on Local erases the
1144    // scope lifetime, so we recover it via pointer cast.
1145    let string_ref: &'s String = unsafe { &*((&*string) as *const String) };
1146    unsafe { Self::new_from_ref(isolate, string_ref) }
1147  }
1148
1149  /// Constructs a `ValueView` from a raw string reference.
1150  ///
1151  /// # Safety
1152  ///
1153  /// The caller must ensure that `string` is a valid V8 string that
1154  /// remains alive for at least `'s`. In practice this means the
1155  /// string must be reachable from a handle scope that outlives the
1156  /// returned `ValueView`.
1157  #[inline(always)]
1158  pub(crate) unsafe fn new_from_ref(
1159    isolate: &Isolate,
1160    string: &'s String,
1161  ) -> Self {
1162    let mut v = std::mem::MaybeUninit::uninit();
1163    unsafe {
1164      v8__String__ValueView__CONSTRUCT(
1165        v.as_mut_ptr(),
1166        isolate.as_real_ptr(),
1167        string,
1168      );
1169      v.assume_init()
1170    }
1171  }
1172
1173  #[inline(always)]
1174  pub fn data(&self) -> ValueViewData<'_> {
1175    unsafe {
1176      let data = v8__String__ValueView__data(self);
1177      let length = v8__String__ValueView__length(self) as usize;
1178      if v8__String__ValueView__is_one_byte(self) {
1179        ValueViewData::OneByte(std::slice::from_raw_parts(data as _, length))
1180      } else {
1181        ValueViewData::TwoByte(std::slice::from_raw_parts(data as _, length))
1182      }
1183    }
1184  }
1185
1186  /// Returns a zero-copy `&str` if the string is one-byte and pure ASCII.
1187  ///
1188  /// This is the fastest way to access a V8 string's contents as a Rust
1189  /// `&str` — no allocation, no copy, no transcoding. Returns `None` for
1190  /// strings that contain non-ASCII Latin-1 bytes or are two-byte encoded.
1191  ///
1192  /// The returned reference is valid as long as this `ValueView` is alive.
1193  #[inline(always)]
1194  pub fn as_str(&self) -> Option<&str> {
1195    match self.data() {
1196      ValueViewData::OneByte(bytes) => {
1197        if bytes.is_ascii() {
1198          // SAFETY: ASCII bytes are valid UTF-8.
1199          Some(unsafe { std::str::from_utf8_unchecked(bytes) })
1200        } else {
1201          None
1202        }
1203      }
1204      ValueViewData::TwoByte(_) => None,
1205    }
1206  }
1207
1208  /// Returns the string contents as a `Cow<str>`.
1209  ///
1210  /// - **One-byte ASCII**: returns `Cow::Borrowed(&str)` — true zero-copy.
1211  /// - **One-byte Latin-1** (non-ASCII): transcodes to UTF-8, returns
1212  ///   `Cow::Owned`.
1213  /// - **Two-byte** (UTF-16/WTF-16): transcodes to UTF-8, returns
1214  ///   `Cow::Owned`. When the `simdutf` feature is enabled, uses
1215  ///   SIMD-accelerated conversion for valid UTF-16 strings above a
1216  ///   threshold size.
1217  #[inline(always)]
1218  pub fn to_cow_lossy(&self) -> Cow<'_, str> {
1219    match self.data() {
1220      ValueViewData::OneByte(bytes) => {
1221        if bytes.is_ascii() {
1222          // SAFETY: ASCII bytes are valid UTF-8.
1223          Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(bytes) })
1224        } else {
1225          Cow::Owned(latin1_to_string(bytes))
1226        }
1227      }
1228      ValueViewData::TwoByte(units) => Cow::Owned(wtf16_to_string(units)),
1229    }
1230  }
1231}
1232
1233// ---------------------------------------------------------------------------
1234// String conversion helpers.
1235// When the `simdutf` feature is enabled, hot paths dispatch to
1236// SIMD-accelerated routines in `crate::simdutf`.
1237// ---------------------------------------------------------------------------
1238
1239/// The minimum number of UTF-16 code units before we try the SIMD path.
1240/// Below this threshold the overhead of validation + length pre-scan is
1241/// not worth it, so we fall back to the scalar loop.
1242#[cfg(feature = "simdutf")]
1243const WTF16_SIMD_THRESHOLD: usize = 96;
1244
1245/// Converts Latin-1 bytes to an owned [`std::string::String`].
1246#[inline(always)]
1247fn latin1_to_string(bytes: &[u8]) -> std::string::String {
1248  debug_assert!(!bytes.is_ascii());
1249  #[cfg(feature = "simdutf")]
1250  {
1251    let utf8_len = crate::simdutf::utf8_length_from_latin1(bytes);
1252    let mut buf: Vec<u8> = Vec::with_capacity(utf8_len);
1253    unsafe {
1254      let out = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), utf8_len);
1255      let written = crate::simdutf::convert_latin1_to_utf8(bytes, out);
1256      debug_assert_eq!(written, utf8_len);
1257      buf.set_len(written);
1258      std::string::String::from_utf8_unchecked(buf)
1259    }
1260  }
1261  #[cfg(not(feature = "simdutf"))]
1262  {
1263    let max_utf8_len = bytes.len() * 2;
1264    let mut buf: Vec<u8> = Vec::with_capacity(max_utf8_len);
1265    unsafe {
1266      let written =
1267        latin1_to_utf8(bytes.len(), bytes.as_ptr(), buf.as_mut_ptr());
1268      buf.set_len(written);
1269      std::string::String::from_utf8_unchecked(buf)
1270    }
1271  }
1272}
1273
1274/// Converts (potentially ill-formed) UTF-16LE / WTF-16 code units to an
1275/// owned [`std::string::String`], replacing unpaired surrogates with U+FFFD.
1276#[inline(always)]
1277fn wtf16_to_string(units: &[u16]) -> std::string::String {
1278  #[cfg(feature = "simdutf")]
1279  {
1280    // For longer, valid UTF-16 strings, use simdutf's SIMD-accelerated path.
1281    if units.len() >= WTF16_SIMD_THRESHOLD
1282      && crate::simdutf::validate_utf16le(units)
1283    {
1284      let utf8_len = crate::simdutf::utf8_length_from_utf16le(units);
1285      let mut buf: Vec<u8> = Vec::with_capacity(utf8_len);
1286      unsafe {
1287        let out = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), utf8_len);
1288        let written = crate::simdutf::convert_utf16le_to_utf8(units, out);
1289        debug_assert_eq!(written, utf8_len);
1290        buf.set_len(written);
1291        return std::string::String::from_utf8_unchecked(buf);
1292      }
1293    }
1294  }
1295  // Scalar fallback: handles short strings and strings with unpaired
1296  // surrogates (WTF-16).
1297  let mut buf = std::string::String::with_capacity(units.len() * 3);
1298  for result in std::char::decode_utf16(units.iter().copied()) {
1299    buf.push(result.unwrap_or('\u{FFFD}'));
1300  }
1301  buf
1302}
1303
1304/// Appends WTF-16 code units as UTF-8 into an existing string buffer.
1305#[inline(always)]
1306fn wtf16_into_string(units: &[u16], buf: &mut std::string::String) {
1307  #[cfg(feature = "simdutf")]
1308  {
1309    if units.len() >= WTF16_SIMD_THRESHOLD
1310      && crate::simdutf::validate_utf16le(units)
1311    {
1312      let utf8_len = crate::simdutf::utf8_length_from_utf16le(units);
1313      buf.reserve(utf8_len);
1314      unsafe {
1315        let vec = buf.as_mut_vec();
1316        let out = std::slice::from_raw_parts_mut(vec.as_mut_ptr(), utf8_len);
1317        let written = crate::simdutf::convert_utf16le_to_utf8(units, out);
1318        debug_assert_eq!(written, utf8_len);
1319        vec.set_len(written);
1320      }
1321      return;
1322    }
1323  }
1324  // Scalar fallback.
1325  buf.reserve(units.len() * 3);
1326  for result in std::char::decode_utf16(units.iter().copied()) {
1327    buf.push(result.unwrap_or('\u{FFFD}'));
1328  }
1329}
1330
1331/// Converts Latin-1 bytes to a `Cow<str>`, borrowing from `buffer` when
1332/// the transcoded result fits.
1333#[inline(always)]
1334fn latin1_to_cow_str<'a, const N: usize>(
1335  bytes: &[u8],
1336  buffer: &'a mut [MaybeUninit<u8>; N],
1337) -> Cow<'a, str> {
1338  #[cfg(feature = "simdutf")]
1339  let utf8_len = crate::simdutf::utf8_length_from_latin1(bytes);
1340  #[cfg(not(feature = "simdutf"))]
1341  let utf8_len = bytes.len() * 2; // conservative upper bound
1342
1343  if utf8_len <= N {
1344    #[cfg(feature = "simdutf")]
1345    let written = unsafe {
1346      let out = std::slice::from_raw_parts_mut(
1347        buffer.as_mut_ptr() as *mut u8,
1348        utf8_len,
1349      );
1350      crate::simdutf::convert_latin1_to_utf8(bytes, out)
1351    };
1352    #[cfg(not(feature = "simdutf"))]
1353    let written = unsafe {
1354      latin1_to_utf8(
1355        bytes.len(),
1356        bytes.as_ptr(),
1357        buffer.as_mut_ptr() as *mut u8,
1358      )
1359    };
1360
1361    unsafe {
1362      let buf = &mut buffer[..written];
1363      let buf = &mut *(buf as *mut [_] as *mut [u8]);
1364      Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1365    }
1366  } else {
1367    Cow::Owned(latin1_to_string(bytes))
1368  }
1369}
1370
1371/// Converts WTF-16 code units to a `Cow<str>`, borrowing from `buffer`
1372/// when the transcoded result fits.
1373#[inline(always)]
1374fn wtf16_to_cow_str<'a, const N: usize>(
1375  units: &[u16],
1376  buffer: &'a mut [MaybeUninit<u8>; N],
1377) -> Cow<'a, str> {
1378  #[cfg(feature = "simdutf")]
1379  {
1380    if units.len() >= WTF16_SIMD_THRESHOLD
1381      && crate::simdutf::validate_utf16le(units)
1382    {
1383      let utf8_len = crate::simdutf::utf8_length_from_utf16le(units);
1384
1385      if utf8_len <= N {
1386        let written = unsafe {
1387          let out = std::slice::from_raw_parts_mut(
1388            buffer.as_mut_ptr() as *mut u8,
1389            utf8_len,
1390          );
1391          crate::simdutf::convert_utf16le_to_utf8(units, out)
1392        };
1393        return unsafe {
1394          let buf = &mut buffer[..written];
1395          let buf = &mut *(buf as *mut [_] as *mut [u8]);
1396          Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1397        };
1398      }
1399
1400      // Doesn't fit in the stack buffer — allocate.
1401      return Cow::Owned(wtf16_to_string(units));
1402    }
1403  }
1404
1405  // Scalar fallback: try to fit into the buffer, otherwise allocate.
1406  let mut pos = 0;
1407  let mut tmp = [0u8; 4];
1408  let mut all_fit = true;
1409  for result in std::char::decode_utf16(units.iter().copied()) {
1410    let c = result.unwrap_or('\u{FFFD}');
1411    let encoded = c.encode_utf8(&mut tmp);
1412    if pos + encoded.len() > N {
1413      all_fit = false;
1414      break;
1415    }
1416    unsafe {
1417      std::ptr::copy_nonoverlapping(
1418        encoded.as_ptr(),
1419        (buffer.as_mut_ptr() as *mut u8).add(pos),
1420        encoded.len(),
1421      );
1422    }
1423    pos += encoded.len();
1424  }
1425  if all_fit {
1426    unsafe {
1427      let buf = &mut buffer[..pos];
1428      let buf = &mut *(buf as *mut [_] as *mut [u8]);
1429      Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1430    }
1431  } else {
1432    Cow::Owned(std::string::String::from_utf16_lossy(units))
1433  }
1434}
1435
1436impl Drop for ValueView<'_> {
1437  fn drop(&mut self) {
1438    unsafe { v8__String__ValueView__DESTRUCT(self) }
1439  }
1440}
v8/string.rs

v8/
string.rs