v8/
string.rs

1use crate::Isolate;
2use crate::Local;
3use crate::String;
4use crate::binding::v8__String__kMaxLength;
5use crate::isolate::RealIsolate;
6use crate::scope::PinScope;
7use crate::support::Opaque;
8use crate::support::char;
9use crate::support::int;
10use crate::support::size_t;
11use std::borrow::Cow;
12use std::convert::TryInto;
13use std::default::Default;
14use std::ffi::c_void;
15use std::marker::PhantomData;
16use std::mem::MaybeUninit;
17use std::ptr::NonNull;
18use std::slice;
19
20/// Converts Latin-1 encoded bytes to UTF-8, writing into the output buffer.
21///
22/// The output buffer must have at least `2 * input_length` bytes of capacity,
23/// since each Latin-1 byte can expand to at most 2 UTF-8 bytes.
24///
25/// Returns the number of bytes written to the output buffer.
26///
27/// # Safety
28///
29/// - `inbuf` must point to at least `input_length` readable bytes.
30/// - `outbuf` must point to at least `2 * input_length` writable bytes.
31#[inline(always)]
32pub unsafe fn latin1_to_utf8(
33  input_length: usize,
34  inbuf: *const u8,
35  outbuf: *mut u8,
36) -> usize {
37  unsafe {
38    let mut output = 0;
39    let mut input = 0;
40
41    // Process 8 bytes at a time: check if all are ASCII with a single AND
42    while input + 8 <= input_length {
43      let chunk = (inbuf.add(input) as *const u64).read_unaligned();
44      if chunk & 0x8080_8080_8080_8080 == 0 {
45        // All 8 bytes are ASCII, copy in bulk
46        (outbuf.add(output) as *mut u64).write_unaligned(chunk);
47        input += 8;
48        output += 8;
49      } else {
50        // At least one non-ASCII byte, process individually
51        let end = input + 8;
52        while input < end {
53          let byte = *(inbuf.add(input));
54          if byte < 0x80 {
55            *(outbuf.add(output)) = byte;
56            output += 1;
57          } else {
58            // Latin-1 byte to two-byte UTF-8 sequence
59            *(outbuf.add(output)) = (byte >> 6) | 0b1100_0000;
60            *(outbuf.add(output + 1)) = (byte & 0b0011_1111) | 0b1000_0000;
61            output += 2;
62          }
63          input += 1;
64        }
65      }
66    }
67
68    // Handle remaining bytes
69    while input < input_length {
70      let byte = *(inbuf.add(input));
71      if byte < 0x80 {
72        *(outbuf.add(output)) = byte;
73        output += 1;
74      } else {
75        *(outbuf.add(output)) = (byte >> 6) | 0b1100_0000;
76        *(outbuf.add(output + 1)) = (byte & 0b0011_1111) | 0b1000_0000;
77        output += 2;
78      }
79      input += 1;
80    }
81    output
82  }
83}
84
85unsafe extern "C" {
86  fn v8__String__Empty(isolate: *mut RealIsolate) -> *const String;
87
88  fn v8__String__NewFromUtf8(
89    isolate: *mut RealIsolate,
90    data: *const char,
91    new_type: NewStringType,
92    length: int,
93  ) -> *const String;
94
95  fn v8__String__NewFromOneByte(
96    isolate: *mut RealIsolate,
97    data: *const u8,
98    new_type: NewStringType,
99    length: int,
100  ) -> *const String;
101
102  fn v8__String__NewFromTwoByte(
103    isolate: *mut RealIsolate,
104    data: *const u16,
105    new_type: NewStringType,
106    length: int,
107  ) -> *const String;
108
109  fn v8__String__Length(this: *const String) -> int;
110
111  fn v8__String__Utf8Length(
112    this: *const String,
113    isolate: *mut RealIsolate,
114  ) -> int;
115
116  fn v8__String__Write_v2(
117    this: *const String,
118    isolate: *mut RealIsolate,
119    offset: u32,
120    length: u32,
121    buffer: *mut u16,
122    flags: int,
123  );
124
125  fn v8__String__WriteOneByte_v2(
126    this: *const String,
127    isolate: *mut RealIsolate,
128    offset: u32,
129    length: u32,
130    buffer: *mut u8,
131    flags: int,
132  );
133
134  fn v8__String__WriteUtf8_v2(
135    this: *const String,
136    isolate: *mut RealIsolate,
137    buffer: *mut char,
138    capacity: size_t,
139    flags: int,
140    processed_characters_return: *mut size_t,
141  ) -> int;
142
143  fn v8__String__GetExternalStringResource(
144    this: *const String,
145  ) -> *mut ExternalStringResource;
146  fn v8__String__GetExternalStringResourceBase(
147    this: *const String,
148    encoding: *mut Encoding,
149  ) -> *mut ExternalStringResourceBase;
150
151  fn v8__String__NewExternalOneByteConst(
152    isolate: *mut RealIsolate,
153    onebyte_const: *const OneByteConst,
154  ) -> *const String;
155
156  fn v8__String__NewExternalOneByteStatic(
157    isolate: *mut RealIsolate,
158    buffer: *const char,
159    length: int,
160  ) -> *const String;
161
162  fn v8__String__NewExternalOneByte(
163    isolate: *mut RealIsolate,
164    buffer: *mut char,
165    length: size_t,
166    free: unsafe extern "C" fn(*mut char, size_t),
167  ) -> *const String;
168
169  fn v8__String__NewExternalTwoByteStatic(
170    isolate: *mut RealIsolate,
171    buffer: *const u16,
172    length: int,
173  ) -> *const String;
174
175  #[allow(dead_code)]
176  fn v8__String__IsExternal(this: *const String) -> bool;
177  fn v8__String__IsExternalOneByte(this: *const String) -> bool;
178  fn v8__String__IsExternalTwoByte(this: *const String) -> bool;
179  #[allow(dead_code)]
180  fn v8__String__IsOneByte(this: *const String) -> bool;
181  fn v8__String__ContainsOnlyOneByte(this: *const String) -> bool;
182  fn v8__ExternalOneByteStringResource__data(
183    this: *const ExternalOneByteStringResource,
184  ) -> *const char;
185  fn v8__ExternalOneByteStringResource__length(
186    this: *const ExternalOneByteStringResource,
187  ) -> size_t;
188
189  fn v8__String__ValueView__CONSTRUCT(
190    buf: *mut ValueView,
191    isolate: *mut RealIsolate,
192    string: *const String,
193  );
194  fn v8__String__ValueView__DESTRUCT(this: *mut ValueView);
195  fn v8__String__ValueView__is_one_byte(this: *const ValueView) -> bool;
196  fn v8__String__ValueView__data(this: *const ValueView) -> *const c_void;
197  fn v8__String__ValueView__length(this: *const ValueView) -> int;
198}
199
200#[derive(PartialEq, Debug)]
201#[repr(C)]
202pub enum Encoding {
203  Unknown = 0x1,
204  TwoByte = 0x2,
205  OneByte = 0x8,
206}
207
208#[repr(C)]
209pub struct ExternalStringResource(Opaque);
210
211#[repr(C)]
212pub struct ExternalStringResourceBase(Opaque);
213
214#[repr(C)]
215/// An external, one-byte string resource.
216/// This corresponds with `v8::String::ExternalOneByteStringResource`.
217///
218/// Note: The data contained in a one-byte string resource is guaranteed to be
219/// Latin-1 data. It is not safe to assume that it is valid UTF-8, as Latin-1
220/// only has commonality with UTF-8 in the ASCII range and differs beyond that.
221pub struct ExternalOneByteStringResource(Opaque);
222
223impl ExternalOneByteStringResource {
224  /// Returns a pointer to the data owned by this resource.
225  /// This pointer is valid as long as the resource is alive.
226  /// The data is guaranteed to be Latin-1.
227  #[inline]
228  pub fn data(&self) -> *const char {
229    unsafe { v8__ExternalOneByteStringResource__data(self) }
230  }
231
232  /// Returns the length of the data owned by this resource.
233  #[inline]
234  pub fn length(&self) -> usize {
235    unsafe { v8__ExternalOneByteStringResource__length(self) }
236  }
237
238  /// Returns the data owned by this resource as a string slice.
239  /// The data is guaranteed to be Latin-1.
240  #[inline]
241  pub fn as_bytes(&self) -> &[u8] {
242    let len = self.length();
243    if len == 0 {
244      &[]
245    } else {
246      // SAFETY: We know this is Latin-1
247      unsafe { std::slice::from_raw_parts(self.data().cast(), len) }
248    }
249  }
250}
251
252/// A static ASCII string resource for usage in V8, created at build time.
253#[repr(C)]
254#[derive(Copy, Clone, Debug)]
255pub struct OneByteConst {
256  vtable: *const OneByteConstNoOp,
257  cached_data: *const char,
258  length: usize,
259}
260
261impl OneByteConst {
262  /// `const` function that returns this string as a string reference.
263  #[inline(always)]
264  pub const fn as_str(&self) -> &str {
265    if self.length == 0 {
266      ""
267    } else {
268      // SAFETY: We know this is ASCII and length > 0
269      unsafe {
270        std::str::from_utf8_unchecked(std::slice::from_raw_parts(
271          self.cached_data as _,
272          self.length,
273        ))
274      }
275    }
276  }
277}
278
279impl AsRef<str> for OneByteConst {
280  #[inline(always)]
281  fn as_ref(&self) -> &str {
282    self.as_str()
283  }
284}
285
286impl AsRef<[u8]> for OneByteConst {
287  #[inline(always)]
288  fn as_ref(&self) -> &[u8] {
289    self.as_str().as_bytes()
290  }
291}
292
293impl std::ops::Deref for OneByteConst {
294  type Target = str;
295  #[inline(always)]
296  fn deref(&self) -> &Self::Target {
297    self.as_ref()
298  }
299}
300
301// SAFETY: The vtable for OneByteConst is an immutable static and all
302// of the included functions are thread-safe, the cached_data pointer
303// is never changed and points to a static ASCII string, and the
304// length is likewise never changed. Thus, it is safe to share the
305// OneByteConst across threads. This means that multiple isolates
306// can use the same OneByteConst statics simultaneously.
307unsafe impl Sync for OneByteConst {}
308
309unsafe extern "C" fn one_byte_const_no_op(_this: *const OneByteConst) {}
310unsafe extern "C" fn one_byte_const_is_cacheable(
311  _this: *const OneByteConst,
312) -> bool {
313  true
314}
315unsafe extern "C" fn one_byte_const_data(
316  this: *const OneByteConst,
317) -> *const char {
318  // SAFETY: Only called from C++ with a valid OneByteConst pointer.
319  unsafe { (*this).cached_data }
320}
321unsafe extern "C" fn one_byte_const_length(this: *const OneByteConst) -> usize {
322  // SAFETY: Only called from C++ with a valid OneByteConst pointer.
323  unsafe { (*this).length }
324}
325unsafe extern "C" fn one_byte_const_unaccount(
326  _this: *const OneByteConst,
327  _isolate: *mut RealIsolate,
328) {
329}
330unsafe extern "C" fn one_byte_const_estimate_memory_usage(
331  _this: *const OneByteConst,
332) -> size_t {
333  usize::MAX // ExternalStringResource::kDefaultMemoryEstimate
334}
335unsafe extern "C" fn one_byte_const_estimate_shared_memory_usage(
336  _this: *const OneByteConst,
337  _recorder: *mut (),
338) {
339}
340
341type OneByteConstNoOp = unsafe extern "C" fn(*const OneByteConst);
342type OneByteConstIsCacheable =
343  unsafe extern "C" fn(*const OneByteConst) -> bool;
344type OneByteConstData =
345  unsafe extern "C" fn(*const OneByteConst) -> *const char;
346type OneByteConstLength = unsafe extern "C" fn(*const OneByteConst) -> usize;
347type OneByteConstUnaccount =
348  unsafe extern "C" fn(*const OneByteConst, *mut RealIsolate);
349type OneByteConstEstimateMemoryUsage =
350  unsafe extern "C" fn(*const OneByteConst) -> size_t;
351type OneByteConstEstimateSharedMemoryUsage =
352  unsafe extern "C" fn(*const OneByteConst, *mut ());
353
354#[repr(C)]
355struct OneByteConstVtable {
356  #[cfg(target_family = "windows")]
357  // In SysV / Itanium ABI -0x10 offset of the vtable
358  // tells how many bytes the vtable pointer pointing to
359  // this vtable is offset from the base class. For
360  // single inheritance this is always 0.
361  _offset_to_top: usize,
362  // In Itanium ABI the -0x08 offset contains the type_info
363  // pointer, and in MSVC it contains the RTTI Complete Object
364  // Locator pointer. V8 is normally compiled with `-fno-rtti`
365  // meaning that this pointer is a nullptr on both
366  // Itanium and MSVC.
367  _typeinfo: *const (),
368  // After the metadata fields come the virtual function
369  // pointers. The vtable pointer in a class instance points
370  // to the first virtual function pointer, making this
371  // the 0x00 offset of the table.
372  // The order of the virtual function pointers is determined
373  // by their order of declaration in the classes.
374  delete1: OneByteConstNoOp,
375  // In SysV / Itanium ABI, a class vtable includes the
376  // deleting destructor and the compete object destructor.
377  // In MSVC, it only includes the deleting destructor.
378  #[cfg(not(target_family = "windows"))]
379  delete2: OneByteConstNoOp,
380  is_cacheable: OneByteConstIsCacheable,
381  unaccount: OneByteConstUnaccount,
382  estimate_memory_usage: OneByteConstEstimateMemoryUsage,
383  estimate_shared_memory_usage: OneByteConstEstimateSharedMemoryUsage,
384  dispose: OneByteConstNoOp,
385  lock: OneByteConstNoOp,
386  unlock: OneByteConstNoOp,
387  data: OneByteConstData,
388  length: OneByteConstLength,
389}
390
391const ONE_BYTE_CONST_VTABLE: OneByteConstVtable = OneByteConstVtable {
392  #[cfg(target_family = "windows")]
393  _offset_to_top: 0,
394  _typeinfo: std::ptr::null(),
395  delete1: one_byte_const_no_op,
396  #[cfg(not(target_family = "windows"))]
397  delete2: one_byte_const_no_op,
398  is_cacheable: one_byte_const_is_cacheable,
399  unaccount: one_byte_const_unaccount,
400  estimate_memory_usage: one_byte_const_estimate_memory_usage,
401  estimate_shared_memory_usage: one_byte_const_estimate_shared_memory_usage,
402  dispose: one_byte_const_no_op,
403  lock: one_byte_const_no_op,
404  unlock: one_byte_const_no_op,
405  data: one_byte_const_data,
406  length: one_byte_const_length,
407};
408
409#[repr(C)]
410#[derive(Debug, Default)]
411pub enum NewStringType {
412  #[default]
413  Normal,
414  Internalized,
415}
416
417bitflags! {
418  #[derive(Clone, Copy, Default)]
419  #[repr(transparent)]
420  pub struct WriteOptions: int {
421    const NO_OPTIONS = 0;
422    const HINT_MANY_WRITES_EXPECTED = 1;
423    const NO_NULL_TERMINATION = 2;
424    const PRESERVE_ONE_BYTE_NULL = 4;
425    // Used by WriteUtf8 to replace orphan surrogate code units with the
426    // unicode replacement character. Needs to be set to guarantee valid UTF-8
427    // output.
428    const REPLACE_INVALID_UTF8 = 8;
429  }
430}
431
432bitflags! {
433  #[derive(Clone, Copy, Default)]
434  #[repr(transparent)]
435  pub struct WriteFlags: int {
436    const kNullTerminate = crate::binding::v8_String_WriteFlags_kNullTerminate as _;
437    const kReplaceInvalidUtf8 = crate::binding::v8_String_WriteFlags_kReplaceInvalidUtf8 as _;
438  }
439}
440
441impl String {
442  /// The maximum length (in bytes) of a buffer that a v8::String can be built
443  /// from. Attempting to create a v8::String from a larger buffer will result
444  /// in None being returned.
445  pub const MAX_LENGTH: usize = v8__String__kMaxLength as _;
446
447  #[inline(always)]
448  pub fn empty<'s>(scope: &PinScope<'s, '_, ()>) -> Local<'s, String> {
449    // FIXME(bnoordhuis) v8__String__Empty() is infallible so there
450    // is no need to box up the result, only to unwrap it again.
451    unsafe { scope.cast_local(|sd| v8__String__Empty(sd.get_isolate_ptr())) }
452      .unwrap()
453  }
454
455  /// Allocates a new string from UTF-8 data. Only returns an empty value when
456  /// length > kMaxLength
457  #[inline(always)]
458  pub fn new_from_utf8<'s>(
459    scope: &PinScope<'s, '_, ()>,
460    buffer: &[u8],
461    new_type: NewStringType,
462  ) -> Option<Local<'s, String>> {
463    if buffer.is_empty() {
464      return Some(Self::empty(scope));
465    }
466    let buffer_len = buffer.len().try_into().ok()?;
467    unsafe {
468      scope.cast_local(|sd| {
469        v8__String__NewFromUtf8(
470          sd.get_isolate_ptr(),
471          buffer.as_ptr() as *const char,
472          new_type,
473          buffer_len,
474        )
475      })
476    }
477  }
478
479  /// Allocates a new string from Latin-1 data.  Only returns an empty value when
480  /// length > kMaxLength.
481  #[inline(always)]
482  pub fn new_from_one_byte<'s>(
483    scope: &PinScope<'s, '_, ()>,
484    buffer: &[u8],
485    new_type: NewStringType,
486  ) -> Option<Local<'s, String>> {
487    let buffer_len = buffer.len().try_into().ok()?;
488    unsafe {
489      scope.cast_local(|sd| {
490        v8__String__NewFromOneByte(
491          sd.get_isolate_ptr(),
492          buffer.as_ptr(),
493          new_type,
494          buffer_len,
495        )
496      })
497    }
498  }
499
500  /// Allocates a new string from UTF-16 data. Only returns an empty value when
501  /// length > kMaxLength.
502  #[inline(always)]
503  pub fn new_from_two_byte<'s>(
504    scope: &PinScope<'s, '_, ()>,
505    buffer: &[u16],
506    new_type: NewStringType,
507  ) -> Option<Local<'s, String>> {
508    let buffer_len = buffer.len().try_into().ok()?;
509    unsafe {
510      scope.cast_local(|sd| {
511        v8__String__NewFromTwoByte(
512          sd.get_isolate_ptr(),
513          buffer.as_ptr(),
514          new_type,
515          buffer_len,
516        )
517      })
518    }
519  }
520
521  /// Returns the number of characters (UTF-16 code units) in this string.
522  #[inline(always)]
523  pub fn length(&self) -> usize {
524    unsafe { v8__String__Length(self) as usize }
525  }
526
527  /// Returns the number of bytes in the UTF-8 encoded representation of this
528  /// string.
529  #[inline(always)]
530  pub fn utf8_length(&self, scope: &Isolate) -> usize {
531    unsafe { v8__String__Utf8Length(self, scope.as_real_ptr()) as usize }
532  }
533
534  /// Writes the contents of the string to an external buffer, as 16-bit
535  /// (UTF-16) character codes.
536  #[inline(always)]
537  pub fn write_v2(
538    &self,
539    scope: &Isolate,
540    offset: u32,
541    buffer: &mut [u16],
542    flags: WriteFlags,
543  ) {
544    unsafe {
545      v8__String__Write_v2(
546        self,
547        scope.as_real_ptr(),
548        offset,
549        self.length().min(buffer.len()) as _,
550        buffer.as_mut_ptr(),
551        flags.bits(),
552      )
553    }
554  }
555
556  /// Writes the contents of the string to an external buffer, as one-byte
557  /// (Latin-1) characters.
558  #[inline(always)]
559  pub fn write_one_byte_v2(
560    &self,
561    scope: &Isolate,
562    offset: u32,
563    buffer: &mut [u8],
564    flags: WriteFlags,
565  ) {
566    unsafe {
567      v8__String__WriteOneByte_v2(
568        self,
569        scope.as_real_ptr(),
570        offset,
571        self.length().min(buffer.len()) as _,
572        buffer.as_mut_ptr(),
573        flags.bits(),
574      )
575    }
576  }
577
578  /// Writes the contents of the string to an external [`MaybeUninit`] buffer, as one-byte
579  /// (Latin-1) characters.
580  #[inline(always)]
581  pub fn write_one_byte_uninit_v2(
582    &self,
583    scope: &Isolate,
584    offset: u32,
585    buffer: &mut [MaybeUninit<u8>],
586    flags: WriteFlags,
587  ) {
588    unsafe {
589      v8__String__WriteOneByte_v2(
590        self,
591        scope.as_real_ptr(),
592        offset,
593        self.length().min(buffer.len()) as _,
594        buffer.as_mut_ptr() as _,
595        flags.bits(),
596      )
597    }
598  }
599
600  /// Writes the contents of the string to an external buffer, as UTF-8.
601  #[inline(always)]
602  pub fn write_utf8_v2(
603    &self,
604    scope: &Isolate,
605    buffer: &mut [u8],
606    flags: WriteFlags,
607    processed_characters_return: Option<&mut usize>,
608  ) -> usize {
609    unsafe {
610      // SAFETY:
611      // We assume that v8 will overwrite the buffer without de-initializing any byte in it.
612      // So the type casting of the buffer is safe.
613
614      let buffer = {
615        let len = buffer.len();
616        let data = buffer.as_mut_ptr().cast();
617        slice::from_raw_parts_mut(data, len)
618      };
619      self.write_utf8_uninit_v2(
620        scope,
621        buffer,
622        flags,
623        processed_characters_return,
624      )
625    }
626  }
627
628  /// Writes the contents of the string to an external [`MaybeUninit`] buffer, as UTF-8.
629  pub fn write_utf8_uninit_v2(
630    &self,
631    scope: &Isolate,
632    buffer: &mut [MaybeUninit<u8>],
633    flags: WriteFlags,
634    processed_characters_return: Option<&mut usize>,
635  ) -> usize {
636    let bytes = unsafe {
637      v8__String__WriteUtf8_v2(
638        self,
639        scope.as_real_ptr(),
640        buffer.as_mut_ptr() as _,
641        buffer.len(),
642        flags.bits(),
643        processed_characters_return
644          .map(|p| p as *mut _)
645          .unwrap_or(std::ptr::null_mut()),
646      )
647    };
648    bytes as usize
649  }
650
651  // Convenience function not present in the original V8 API.
652  #[inline(always)]
653  pub fn new<'s>(
654    scope: &PinScope<'s, '_, ()>,
655    value: &str,
656  ) -> Option<Local<'s, String>> {
657    Self::new_from_utf8(scope, value.as_ref(), NewStringType::Normal)
658  }
659
660  /// Compile-time function to create an external string resource.
661  /// The buffer is checked to contain only ASCII characters.
662  #[inline(always)]
663  pub const fn create_external_onebyte_const(
664    buffer: &'static [u8],
665  ) -> OneByteConst {
666    // Assert that the buffer contains only ASCII, and that the
667    // length is less or equal to (64-bit) v8::String::kMaxLength.
668    assert!(buffer.is_ascii() && buffer.len() <= ((1 << 29) - 24));
669    OneByteConst {
670      vtable: &ONE_BYTE_CONST_VTABLE.delete1,
671      cached_data: buffer.as_ptr() as *const char,
672      length: buffer.len(),
673    }
674  }
675
676  /// Compile-time function to create an external string resource which
677  /// skips the ASCII and length checks.
678  ///
679  /// ## Safety
680  ///
681  /// The passed in buffer must contain only ASCII data. Note that while V8
682  /// allows OneByte string resources to contain Latin-1 data, the OneByteConst
683  /// struct does not allow it.
684  #[inline(always)]
685  pub const unsafe fn create_external_onebyte_const_unchecked(
686    buffer: &'static [u8],
687  ) -> OneByteConst {
688    OneByteConst {
689      vtable: &ONE_BYTE_CONST_VTABLE.delete1,
690      cached_data: buffer.as_ptr() as *const char,
691      length: buffer.len(),
692    }
693  }
694
695  /// Creates a v8::String from a `&'static OneByteConst`
696  /// which is guaranteed to be ASCII.
697  ///
698  /// Note that OneByteConst guarantees ASCII even though V8 would allow
699  /// OneByte string resources to contain Latin-1.
700  #[inline(always)]
701  pub fn new_from_onebyte_const<'s>(
702    scope: &PinScope<'s, '_, ()>,
703    onebyte_const: &'static OneByteConst,
704  ) -> Option<Local<'s, String>> {
705    unsafe {
706      scope.cast_local(|sd| {
707        v8__String__NewExternalOneByteConst(sd.get_isolate_ptr(), onebyte_const)
708      })
709    }
710  }
711
712  /// Creates a v8::String from a `&'static [u8]`,
713  /// must be Latin-1 or ASCII, not UTF-8!
714  #[inline(always)]
715  pub fn new_external_onebyte_static<'s>(
716    scope: &PinScope<'s, '_, ()>,
717    buffer: &'static [u8],
718  ) -> Option<Local<'s, String>> {
719    let buffer_len = buffer.len().try_into().ok()?;
720    unsafe {
721      scope.cast_local(|sd| {
722        v8__String__NewExternalOneByteStatic(
723          sd.get_isolate_ptr(),
724          buffer.as_ptr() as *const char,
725          buffer_len,
726        )
727      })
728    }
729  }
730
731  /// Creates a `v8::String` from owned bytes.
732  /// The bytes must be Latin-1 or ASCII.
733  /// V8 will take ownership of the buffer and free it when the string is garbage collected.
734  #[inline(always)]
735  pub fn new_external_onebyte<'s>(
736    scope: &PinScope<'s, '_, ()>,
737    buffer: Box<[u8]>,
738  ) -> Option<Local<'s, String>> {
739    let buffer_len = buffer.len();
740    unsafe {
741      scope.cast_local(|sd| {
742        v8__String__NewExternalOneByte(
743          sd.get_isolate_ptr(),
744          Box::into_raw(buffer).cast::<char>(),
745          buffer_len,
746          free_rust_external_onebyte,
747        )
748      })
749    }
750  }
751
752  /// Creates a `v8::String` from owned bytes, length, and a custom destructor.
753  /// The bytes must be Latin-1 or ASCII.
754  /// V8 will take ownership of the buffer and free it when the string is garbage collected.
755  ///
756  /// SAFETY: `buffer` must be owned (valid for the lifetime of the string), and
757  /// `destructor` must be a valid function pointer that can free the buffer.
758  /// The destructor will be called with the buffer and length when the string is garbage collected.
759  #[inline(always)]
760  pub unsafe fn new_external_onebyte_raw<'s>(
761    scope: &PinScope<'s, '_, ()>,
762    buffer: *mut char,
763    buffer_len: usize,
764    destructor: unsafe extern "C" fn(*mut char, usize),
765  ) -> Option<Local<'s, String>> {
766    unsafe {
767      scope.cast_local(|sd| {
768        v8__String__NewExternalOneByte(
769          sd.get_isolate_ptr(),
770          buffer,
771          buffer_len,
772          destructor,
773        )
774      })
775    }
776  }
777
778  /// Creates a v8::String from a `&'static [u16]`.
779  #[inline(always)]
780  pub fn new_external_twobyte_static<'s>(
781    scope: &PinScope<'s, '_, ()>,
782    buffer: &'static [u16],
783  ) -> Option<Local<'s, String>> {
784    let buffer_len = buffer.len().try_into().ok()?;
785    unsafe {
786      scope.cast_local(|sd| {
787        v8__String__NewExternalTwoByteStatic(
788          sd.get_isolate_ptr(),
789          buffer.as_ptr(),
790          buffer_len,
791        )
792      })
793    }
794  }
795
796  /// Get the ExternalStringResource for an external string.
797  ///
798  /// Returns None if is_external() doesn't return true.
799  #[inline]
800  pub fn get_external_string_resource(
801    &self,
802  ) -> Option<NonNull<ExternalStringResource>> {
803    NonNull::new(unsafe { v8__String__GetExternalStringResource(self) })
804  }
805
806  /// Get the ExternalOneByteStringResource for an external one-byte string.
807  ///
808  /// Returns None if is_external_onebyte() doesn't return true.
809  #[inline]
810  pub fn get_external_onebyte_string_resource(
811    &self,
812  ) -> Option<NonNull<ExternalOneByteStringResource>> {
813    let (base, encoding) = self.get_external_string_resource_base();
814    let base = base?;
815    if encoding != Encoding::OneByte {
816      return None;
817    }
818
819    Some(base.cast())
820  }
821
822  /// Get the ExternalStringResourceBase for an external string.
823  /// Note this is just the base class, and isn't very useful on its own.
824  /// You'll want to downcast to one of its subclasses, for instance
825  /// with `get_external_onebyte_string_resource`.
826  pub fn get_external_string_resource_base(
827    &self,
828  ) -> (Option<NonNull<ExternalStringResourceBase>>, Encoding) {
829    let mut encoding = Encoding::Unknown;
830    (
831      NonNull::new(unsafe {
832        v8__String__GetExternalStringResourceBase(self, &mut encoding)
833      }),
834      encoding,
835    )
836  }
837
838  /// True if string is external
839  #[inline(always)]
840  pub fn is_external(&self) -> bool {
841    // TODO: re-enable on next v8-release
842    // Right now it fallbacks to Value::IsExternal, which is incorrect
843    // See: https://source.chromium.org/chromium/_/chromium/v8/v8.git/+/1dd8624b524d14076160c1743f7da0b20fbe68e0
844    // unsafe { v8__String__IsExternal(self) }
845
846    // Fallback for now (though functionally identical)
847    self.is_external_onebyte() || self.is_external_twobyte()
848  }
849
850  /// True if string is external & one-byte
851  /// (e.g: created with new_external_onebyte_static)
852  #[inline(always)]
853  pub fn is_external_onebyte(&self) -> bool {
854    unsafe { v8__String__IsExternalOneByte(self) }
855  }
856
857  /// True if string is external & two-byte
858  /// (e.g: created with new_external_twobyte_static)
859  #[inline(always)]
860  pub fn is_external_twobyte(&self) -> bool {
861    unsafe { v8__String__IsExternalTwoByte(self) }
862  }
863
864  /// Will return true if and only if string is known for certain to contain only one-byte data,
865  /// ie: Latin-1, a.k.a. ISO-8859-1 code points. Doesn't read the string so can return false
866  /// negatives, and a return value of false does not mean this string is not one-byte data.
867  ///
868  /// For a method that will not return false negatives at the cost of
869  /// potentially reading the entire string, use [`contains_only_onebyte()`].
870  ///
871  /// [`contains_only_onebyte()`]: String::contains_only_onebyte
872  #[inline(always)]
873  pub fn is_onebyte(&self) -> bool {
874    unsafe { v8__String__IsOneByte(self) }
875  }
876
877  /// True if the string contains only one-byte data.
878  /// Will read the entire string in some cases.
879  #[inline(always)]
880  pub fn contains_only_onebyte(&self) -> bool {
881    unsafe { v8__String__ContainsOnlyOneByte(self) }
882  }
883
884  /// Creates a copy of a [`crate::String`] in a [`std::string::String`].
885  /// Convenience function not present in the original V8 API.
886  pub fn to_rust_string_lossy(&self, scope: &Isolate) -> std::string::String {
887    let len_utf16 = self.length();
888
889    // No need to allocate or do any work for zero-length strings
890    if len_utf16 == 0 {
891      return std::string::String::new();
892    }
893
894    let len_utf8 = self.utf8_length(scope);
895
896    // If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
897    // string is 100% 7-bit ASCII.
898    if self.is_onebyte() && len_utf8 == len_utf16 {
899      unsafe {
900        // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
901        // accidentally creating a slice of u8 which would be invalid.
902        let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
903        let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
904        let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
905
906        // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
907        self.write_one_byte_uninit_v2(
908          scope,
909          0,
910          &mut *buffer,
911          WriteFlags::kReplaceInvalidUtf8,
912        );
913
914        // Return an owned string from this guaranteed now-initialized data
915        let buffer = data as *mut u8;
916        return std::string::String::from_raw_parts(
917          buffer, len_utf16, len_utf16,
918        );
919      }
920    }
921
922    // SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
923    // We have a large number of invariants to uphold, so please check changes to this code carefully
924    unsafe {
925      // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
926      // accidentally creating a slice of u8 which would be invalid.
927      let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
928      let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
929      let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
930
931      // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
932      let length = self.write_utf8_uninit_v2(
933        scope,
934        &mut *buffer,
935        WriteFlags::kReplaceInvalidUtf8,
936        None,
937      );
938      debug_assert!(length == len_utf8);
939
940      // Return an owned string from this guaranteed now-initialized data
941      let buffer = data as *mut u8;
942      std::string::String::from_raw_parts(buffer, length, len_utf8)
943    }
944  }
945
946  /// Writes the UTF-8 representation of this string into an existing
947  /// [`std::string::String`], reusing its allocation.
948  ///
949  /// The buffer is cleared first, then filled with the string's UTF-8
950  /// contents. This avoids repeated heap allocation when converting
951  /// many V8 strings — callers can keep a single `String` and reuse it.
952  ///
953  /// Uses [`ValueView`] internally for single-pass access, avoiding
954  /// the extra `utf8_length` FFI call.
955  pub fn write_utf8_into(
956    &self,
957    scope: &mut Isolate,
958    buf: &mut std::string::String,
959  ) {
960    buf.clear();
961    let len = self.length();
962    if len == 0 {
963      return;
964    }
965
966    // SAFETY: `self` is a valid V8 string reachable from a handle scope.
967    // The ValueView is dropped before we return.
968    let view = unsafe { ValueView::new_from_ref(scope, self) };
969
970    match view.data() {
971      ValueViewData::OneByte(bytes) => {
972        if bytes.is_ascii() {
973          // ASCII: direct copy, already valid UTF-8.
974          buf.reserve(bytes.len());
975          unsafe {
976            let vec = buf.as_mut_vec();
977            std::ptr::copy_nonoverlapping(
978              bytes.as_ptr(),
979              vec.as_mut_ptr(),
980              bytes.len(),
981            );
982            vec.set_len(bytes.len());
983          }
984        } else {
985          // Latin-1: each byte can expand to at most 2 UTF-8 bytes.
986          let max_utf8_len = bytes.len() * 2;
987          buf.reserve(max_utf8_len);
988          unsafe {
989            let vec = buf.as_mut_vec();
990            let written =
991              latin1_to_utf8(bytes.len(), bytes.as_ptr(), vec.as_mut_ptr());
992            vec.set_len(written);
993          }
994        }
995      }
996      ValueViewData::TwoByte(units) => {
997        // Conservative estimate: each UTF-16 code unit can produce at
998        // most 3 UTF-8 bytes (surrogates produce 4 bytes for 2 units).
999        buf.reserve(units.len() * 3);
1000        for result in std::char::decode_utf16(units.iter().copied()) {
1001          let c = result.unwrap_or('\u{FFFD}');
1002          let mut tmp = [0u8; 4];
1003          buf.push_str(c.encode_utf8(&mut tmp));
1004        }
1005      }
1006    }
1007  }
1008
1009  /// Converts a [`crate::String`] to either an owned [`std::string::String`],
1010  /// or a borrowed [`str`], depending on whether it fits into the provided
1011  /// buffer.
1012  ///
1013  /// Uses [`ValueView`] internally for direct access to the string's
1014  /// contents, eliminating the `utf8_length` pre-scan that the previous
1015  /// implementation required.
1016  pub fn to_rust_cow_lossy<'a, const N: usize>(
1017    &self,
1018    scope: &mut Isolate,
1019    buffer: &'a mut [MaybeUninit<u8>; N],
1020  ) -> Cow<'a, str> {
1021    let len = self.length();
1022    if len == 0 {
1023      return "".into();
1024    }
1025
1026    // SAFETY: `self` is a valid V8 string reachable from a handle scope.
1027    // The ValueView is dropped before we return, so the
1028    // DisallowGarbageCollection scope it holds is properly scoped.
1029    let view = unsafe { ValueView::new_from_ref(scope, self) };
1030
1031    match view.data() {
1032      ValueViewData::OneByte(bytes) => {
1033        if bytes.is_ascii() {
1034          // ASCII: direct memcpy, no transcoding needed.
1035          if bytes.len() <= N {
1036            unsafe {
1037              std::ptr::copy_nonoverlapping(
1038                bytes.as_ptr(),
1039                buffer.as_mut_ptr() as *mut u8,
1040                bytes.len(),
1041              );
1042              let buf = &mut buffer[..bytes.len()];
1043              let buf = &mut *(buf as *mut [_] as *mut [u8]);
1044              Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1045            }
1046          } else {
1047            // SAFETY: ASCII bytes are valid UTF-8.
1048            unsafe {
1049              Cow::Owned(std::string::String::from_utf8_unchecked(
1050                bytes.to_vec(),
1051              ))
1052            }
1053          }
1054        } else {
1055          // Latin-1 non-ASCII: each byte can expand to at most 2 UTF-8
1056          // bytes. Use conservative size check.
1057          let max_utf8_len = bytes.len() * 2;
1058          if max_utf8_len <= N {
1059            let written = unsafe {
1060              latin1_to_utf8(
1061                bytes.len(),
1062                bytes.as_ptr(),
1063                buffer.as_mut_ptr() as *mut u8,
1064              )
1065            };
1066            unsafe {
1067              let buf = &mut buffer[..written];
1068              let buf = &mut *(buf as *mut [_] as *mut [u8]);
1069              Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1070            }
1071          } else {
1072            let mut buf = Vec::with_capacity(max_utf8_len);
1073            unsafe {
1074              let written =
1075                latin1_to_utf8(bytes.len(), bytes.as_ptr(), buf.as_mut_ptr());
1076              buf.set_len(written);
1077              Cow::Owned(std::string::String::from_utf8_unchecked(buf))
1078            }
1079          }
1080        }
1081      }
1082      ValueViewData::TwoByte(units) => {
1083        // Transcode UTF-16 directly into the stack buffer when possible.
1084        let mut pos = 0;
1085        let mut tmp = [0u8; 4];
1086        let mut all_fit = true;
1087        for result in std::char::decode_utf16(units.iter().copied()) {
1088          let c = result.unwrap_or('\u{FFFD}');
1089          let encoded = c.encode_utf8(&mut tmp);
1090          if pos + encoded.len() > N {
1091            all_fit = false;
1092            break;
1093          }
1094          unsafe {
1095            std::ptr::copy_nonoverlapping(
1096              encoded.as_ptr(),
1097              (buffer.as_mut_ptr() as *mut u8).add(pos),
1098              encoded.len(),
1099            );
1100          }
1101          pos += encoded.len();
1102        }
1103        if all_fit {
1104          unsafe {
1105            let buf = &mut buffer[..pos];
1106            let buf = &mut *(buf as *mut [_] as *mut [u8]);
1107            Cow::Borrowed(std::str::from_utf8_unchecked(buf))
1108          }
1109        } else {
1110          Cow::Owned(std::string::String::from_utf16_lossy(units))
1111        }
1112      }
1113    }
1114  }
1115}
1116
1117#[inline]
1118pub unsafe extern "C" fn free_rust_external_onebyte(s: *mut char, len: usize) {
1119  unsafe {
1120    let slice = std::slice::from_raw_parts_mut(s, len);
1121
1122    // Drop the slice
1123    drop(Box::from_raw(slice));
1124  }
1125}
1126
1127#[derive(Debug, PartialEq)]
1128pub enum ValueViewData<'s> {
1129  OneByte(&'s [u8]),
1130  TwoByte(&'s [u16]),
1131}
1132
1133/// Returns a view onto a string's contents.
1134///
1135/// WARNING: This does not copy the string's contents, and will therefore be
1136/// invalidated if the GC can move the string while the ValueView is alive. It
1137/// is therefore required that no GC or allocation can happen while there is an
1138/// active ValueView. This requirement may be relaxed in the future.
1139///
1140/// V8 strings are either encoded as one-byte or two-bytes per character.
1141#[repr(C)]
1142pub struct ValueView<'s>(
1143  [u8; crate::binding::v8__String__ValueView_SIZE],
1144  PhantomData<&'s ()>,
1145);
1146
1147impl<'s> ValueView<'s> {
1148  #[inline(always)]
1149  pub fn new(isolate: &mut Isolate, string: Local<'s, String>) -> Self {
1150    // SAFETY: Local<'s, String> guarantees the V8 string is rooted in a
1151    // HandleScope that lives for at least 's.  Deref on Local erases the
1152    // scope lifetime, so we recover it via pointer cast.
1153    let string_ref: &'s String = unsafe { &*((&*string) as *const String) };
1154    unsafe { Self::new_from_ref(isolate, string_ref) }
1155  }
1156
1157  /// Constructs a `ValueView` from a raw string reference.
1158  ///
1159  /// # Safety
1160  ///
1161  /// The caller must ensure that `string` is a valid V8 string that
1162  /// remains alive for at least `'s`. In practice this means the
1163  /// string must be reachable from a handle scope that outlives the
1164  /// returned `ValueView`.
1165  #[inline(always)]
1166  pub(crate) unsafe fn new_from_ref(
1167    isolate: &mut Isolate,
1168    string: &'s String,
1169  ) -> Self {
1170    let mut v = std::mem::MaybeUninit::uninit();
1171    unsafe {
1172      v8__String__ValueView__CONSTRUCT(
1173        v.as_mut_ptr(),
1174        isolate.as_real_ptr(),
1175        string,
1176      );
1177      v.assume_init()
1178    }
1179  }
1180
1181  #[inline(always)]
1182  pub fn data(&self) -> ValueViewData<'_> {
1183    unsafe {
1184      let data = v8__String__ValueView__data(self);
1185      let length = v8__String__ValueView__length(self) as usize;
1186      if v8__String__ValueView__is_one_byte(self) {
1187        ValueViewData::OneByte(std::slice::from_raw_parts(data as _, length))
1188      } else {
1189        ValueViewData::TwoByte(std::slice::from_raw_parts(data as _, length))
1190      }
1191    }
1192  }
1193
1194  /// Returns a zero-copy `&str` if the string is one-byte and pure ASCII.
1195  ///
1196  /// This is the fastest way to access a V8 string's contents as a Rust
1197  /// `&str` — no allocation, no copy, no transcoding. Returns `None` for
1198  /// strings that contain non-ASCII Latin-1 bytes or are two-byte encoded.
1199  ///
1200  /// The returned reference is valid as long as this `ValueView` is alive.
1201  #[inline(always)]
1202  pub fn as_str(&self) -> Option<&str> {
1203    match self.data() {
1204      ValueViewData::OneByte(bytes) => {
1205        if bytes.is_ascii() {
1206          // SAFETY: ASCII bytes are valid UTF-8.
1207          Some(unsafe { std::str::from_utf8_unchecked(bytes) })
1208        } else {
1209          None
1210        }
1211      }
1212      ValueViewData::TwoByte(_) => None,
1213    }
1214  }
1215
1216  /// Returns the string contents as a `Cow<str>`.
1217  ///
1218  /// - **One-byte ASCII**: returns `Cow::Borrowed(&str)` — true zero-copy.
1219  /// - **One-byte Latin-1** (non-ASCII): transcodes to UTF-8, returns
1220  ///   `Cow::Owned`.
1221  /// - **Two-byte** (UTF-16): transcodes to UTF-8 via
1222  ///   [`std::string::String::from_utf16_lossy`], returns `Cow::Owned`.
1223  ///
1224  /// For the common case of ASCII strings this is zero-copy. The
1225  /// Latin-1 transcoding uses a SIMD-friendly loop that processes 8 bytes
1226  /// at a time.
1227  #[inline(always)]
1228  pub fn to_cow_lossy(&self) -> Cow<'_, str> {
1229    match self.data() {
1230      ValueViewData::OneByte(bytes) => {
1231        if bytes.is_ascii() {
1232          // SAFETY: ASCII bytes are valid UTF-8.
1233          Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(bytes) })
1234        } else {
1235          // Latin-1 → UTF-8 transcoding. Each byte can expand to at
1236          // most 2 UTF-8 bytes.
1237          let mut buf = Vec::with_capacity(bytes.len() * 2);
1238          // SAFETY: buf has capacity >= bytes.len() * 2, and
1239          // latin1_to_utf8 writes valid UTF-8.
1240          unsafe {
1241            let written =
1242              latin1_to_utf8(bytes.len(), bytes.as_ptr(), buf.as_mut_ptr());
1243            buf.set_len(written);
1244            Cow::Owned(std::string::String::from_utf8_unchecked(buf))
1245          }
1246        }
1247      }
1248      ValueViewData::TwoByte(units) => {
1249        Cow::Owned(std::string::String::from_utf16_lossy(units))
1250      }
1251    }
1252  }
1253}
1254
1255impl Drop for ValueView<'_> {
1256  fn drop(&mut self) {
1257    unsafe { v8__String__ValueView__DESTRUCT(self) }
1258  }
1259}
v8/string.rs

v8/
string.rs