rw_deno_core/
fast_string.rs

1// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
2
3use serde::Deserializer;
4use serde::Serializer;
5use std::borrow::Borrow;
6use std::ffi::OsStr;
7use std::fmt::Debug;
8use std::fmt::Display;
9use std::fmt::Formatter;
10use std::hash::Hash;
11use std::ops::Deref;
12use std::sync::Arc;
13use url::Url;
14use v8::NewStringType;
15
16static EMPTY_STRING: v8::OneByteConst =
17  v8::String::create_external_onebyte_const("".as_bytes());
18
19/// A static string that is compile-time checked to be ASCII and is stored in the
20/// most efficient possible way to create V8 strings.
21#[derive(Clone, Copy)]
22#[repr(transparent)]
23pub struct FastStaticString {
24  s: &'static v8::OneByteConst,
25}
26
27impl FastStaticString {
28  pub const fn new(s: &'static v8::OneByteConst) -> Self {
29    FastStaticString { s }
30  }
31
32  pub const fn as_str(&self) -> &'static str {
33    // SAFETY: We know this was constructed from a valid one-byte string.
34    unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
35  }
36
37  pub const fn as_bytes(&self) -> &'static [u8] {
38    unsafe {
39      let ptrs: [usize; 3] = std::ptr::read(self.s as *const _ as *const _);
40      std::slice::from_raw_parts(ptrs[1] as *const u8, ptrs[2])
41    }
42  }
43
44  // TODO(mmastrac): This is a workaround for a compiler error for large scripts that take too
45  // long to call is_ascii on.
46  #[doc(hidden)]
47  pub const fn create_external_onebyte_const(
48    s: &'static [u8],
49  ) -> v8::OneByteConst {
50    #[repr(C)]
51    struct OneByteConst {
52      _vtable: *const (),
53      cached_data: *const char,
54      length: usize,
55    }
56    // SAFETY: Workaround compiler error in create_external_onebyte_const for long ascii strings
57    unsafe {
58      debug_assert!(s.is_ascii());
59      let c = v8::String::create_external_onebyte_const(&[]);
60      let mut ptrs: OneByteConst = std::mem::transmute(c);
61      ptrs.cached_data = std::mem::transmute(s.as_ptr());
62      ptrs.length = s.len();
63      std::mem::transmute(ptrs)
64    }
65  }
66
67  pub fn v8_string<'s>(
68    &self,
69    scope: &mut v8::HandleScope<'s>,
70  ) -> v8::Local<'s, v8::String> {
71    FastString::from(*self).v8_string(scope)
72  }
73
74  pub const fn into_v8_const_ptr(&self) -> *const v8::OneByteConst {
75    self.s as _
76  }
77}
78
79impl From<&'static v8::OneByteConst> for FastStaticString {
80  fn from(s: &'static v8::OneByteConst) -> Self {
81    Self::new(s)
82  }
83}
84
85impl From<FastStaticString> for *const v8::OneByteConst {
86  fn from(val: FastStaticString) -> Self {
87    val.into_v8_const_ptr()
88  }
89}
90
91impl Hash for FastStaticString {
92  fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
93    self.as_str().hash(state)
94  }
95}
96
97impl AsRef<str> for FastStaticString {
98  fn as_ref(&self) -> &str {
99    self.as_str()
100  }
101}
102
103impl Deref for FastStaticString {
104  type Target = str;
105  fn deref(&self) -> &Self::Target {
106    self.as_str()
107  }
108}
109
110impl Borrow<str> for FastStaticString {
111  fn borrow(&self) -> &str {
112    self.as_str()
113  }
114}
115
116impl Debug for FastStaticString {
117  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118    Debug::fmt(self.as_str(), f)
119  }
120}
121
122impl Default for FastStaticString {
123  fn default() -> Self {
124    FastStaticString { s: &EMPTY_STRING }
125  }
126}
127
128impl PartialEq for FastStaticString {
129  fn eq(&self, other: &Self) -> bool {
130    self.as_bytes() == other.as_bytes()
131  }
132}
133
134impl Eq for FastStaticString {}
135
136impl Display for FastStaticString {
137  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
138    f.write_str(self.as_str())
139  }
140}
141
142/// Module names and code can be sourced from strings or bytes that are either owned or borrowed. This enumeration allows us
143/// to perform a minimal amount of cloning and format-shifting of the underlying data.
144///
145/// Note that any [`FastString`] created using [`ascii_str!`] must contain only ASCII characters. Other [`FastString`] types
146/// may be UTF-8, though this will incur a small performance penalty. It is recommended that large, static strings always
147/// use [`ascii_str!`].
148///
149/// Examples of ways to construct a [`FastString`]:
150///
151/// ```rust
152/// # use deno_core::{ascii_str, FastString};
153///
154/// let code: FastString = ascii_str!("a string").into();
155/// let code: FastString = format!("a string").into();
156/// ```
157pub struct FastString {
158  inner: FastStringInner,
159}
160
161enum FastStringInner {
162  /// Created from static data.
163  Static(&'static str),
164
165  /// Created from static ascii, known to contain only ASCII chars.
166  StaticAscii(&'static str),
167
168  /// Created from static data, known to contain only ASCII chars.
169  StaticConst(FastStaticString),
170
171  /// An owned chunk of data. Note that we use `Box` rather than `Vec` to avoid the
172  /// storage overhead.
173  Owned(Box<str>),
174
175  // Scripts loaded from the `deno_graph` infrastructure.
176  Arc(Arc<str>),
177}
178
179impl FastString {
180  /// Create a [`FastString`] from a static string. The string may contain non-ASCII characters, and if
181  /// so, will take the slower path when used in v8.
182  pub const fn from_static(s: &'static str) -> Self {
183    if s.is_ascii() {
184      Self {
185        inner: FastStringInner::StaticAscii(s),
186      }
187    } else {
188      Self {
189        inner: FastStringInner::Static(s),
190      }
191    }
192  }
193
194  /// Returns a static string from this `FastString`, if available.
195  pub fn as_static_str(&self) -> Option<&'static str> {
196    match self.inner {
197      FastStringInner::Static(s) => Some(s),
198      FastStringInner::StaticAscii(s) => Some(s),
199      FastStringInner::StaticConst(s) => Some(s.as_str()),
200      _ => None,
201    }
202  }
203
204  /// Creates a cheap copy of this [`FastString`], potentially transmuting it to a faster form. Note that this
205  /// is not a clone operation as it consumes the old [`FastString`].
206  pub fn into_cheap_copy(self) -> (Self, Self) {
207    match self.inner {
208      FastStringInner::Owned(s) => {
209        let s: Arc<str> = s.into();
210        (
211          Self {
212            inner: FastStringInner::Arc(s.clone()),
213          },
214          Self {
215            inner: FastStringInner::Arc(s),
216          },
217        )
218      }
219      _ => (self.try_clone().unwrap(), self),
220    }
221  }
222
223  /// If this [`FastString`] is cheaply cloneable, returns a clone.
224  pub fn try_clone(&self) -> Option<Self> {
225    match &self.inner {
226      FastStringInner::Static(s) => Some(Self {
227        inner: FastStringInner::Static(s),
228      }),
229      FastStringInner::StaticAscii(s) => Some(Self {
230        inner: FastStringInner::StaticAscii(s),
231      }),
232      FastStringInner::StaticConst(s) => Some(Self {
233        inner: FastStringInner::StaticConst(*s),
234      }),
235      FastStringInner::Arc(s) => Some(Self {
236        inner: FastStringInner::Arc(s.clone()),
237      }),
238      FastStringInner::Owned(_s) => None,
239    }
240  }
241
242  #[inline(always)]
243  pub fn as_bytes(&self) -> &[u8] {
244    self.as_str().as_bytes()
245  }
246
247  #[inline(always)]
248  pub fn as_str(&self) -> &str {
249    match &self.inner {
250      // TODO(mmastrac): When we get a const deref, as_str can be const
251      FastStringInner::Arc(s) => s,
252      FastStringInner::Owned(s) => s,
253      FastStringInner::Static(s) => s,
254      FastStringInner::StaticAscii(s) => s,
255      FastStringInner::StaticConst(s) => s.as_str(),
256    }
257  }
258
259  /// Create a v8 string from this [`FastString`]. If the string is static and contains only ASCII characters,
260  /// an external one-byte static is created.
261  pub fn v8_string<'a>(
262    &self,
263    scope: &mut v8::HandleScope<'a>,
264  ) -> v8::Local<'a, v8::String> {
265    match self.inner {
266      FastStringInner::StaticAscii(s) => {
267        v8::String::new_external_onebyte_static(scope, s.as_bytes()).unwrap()
268      }
269      FastStringInner::StaticConst(s) => {
270        v8::String::new_from_onebyte_const(scope, s.s).unwrap()
271      }
272      _ => {
273        v8::String::new_from_utf8(scope, self.as_bytes(), NewStringType::Normal)
274          .unwrap()
275      }
276    }
277  }
278
279  /// Truncates a [`FastString`] value, possibly re-allocating or memcpy'ing. May be slow.
280  pub fn truncate(&mut self, index: usize) {
281    match &mut self.inner {
282      FastStringInner::Static(b) => {
283        self.inner = FastStringInner::Static(&b[..index])
284      }
285      FastStringInner::StaticAscii(b) => {
286        self.inner = FastStringInner::StaticAscii(&b[..index])
287      }
288      FastStringInner::StaticConst(b) => {
289        self.inner = FastStringInner::StaticAscii(&b.as_str()[..index])
290      }
291      // TODO(mmastrac): this could be more efficient
292      FastStringInner::Owned(b) => {
293        self.inner = FastStringInner::Owned(b[..index].to_owned().into())
294      }
295      // We can't do much if we have an Arc<str>, so we'll just take ownership of the truncated version
296      FastStringInner::Arc(s) => {
297        self.inner = FastStringInner::Arc(s[..index].to_owned().into())
298      }
299    }
300  }
301}
302
303impl Hash for FastString {
304  fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
305    self.as_str().hash(state)
306  }
307}
308
309impl AsRef<str> for FastString {
310  fn as_ref(&self) -> &str {
311    self.as_str()
312  }
313}
314
315impl AsRef<[u8]> for FastString {
316  fn as_ref(&self) -> &[u8] {
317    self.as_str().as_ref()
318  }
319}
320
321impl AsRef<OsStr> for FastString {
322  fn as_ref(&self) -> &OsStr {
323    self.as_str().as_ref()
324  }
325}
326
327impl Deref for FastString {
328  type Target = str;
329  fn deref(&self) -> &Self::Target {
330    self.as_str()
331  }
332}
333
334impl Borrow<str> for FastString {
335  fn borrow(&self) -> &str {
336    self.as_str()
337  }
338}
339
340impl Debug for FastString {
341  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
342    Debug::fmt(self.as_str(), f)
343  }
344}
345
346impl Default for FastString {
347  fn default() -> Self {
348    Self {
349      inner: FastStringInner::StaticConst(FastStaticString::default()),
350    }
351  }
352}
353
354impl PartialEq for FastString {
355  fn eq(&self, other: &Self) -> bool {
356    self.as_bytes() == other.as_bytes()
357  }
358}
359
360impl Eq for FastString {}
361
362impl Display for FastString {
363  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
364    f.write_str(self.as_str())
365  }
366}
367
368/// [`FastString`] can be made cheaply from [`Url`] as we know it's owned and don't need to do an
369/// ASCII check.
370impl From<FastStaticString> for FastString {
371  fn from(value: FastStaticString) -> Self {
372    Self {
373      inner: FastStringInner::StaticConst(value),
374    }
375  }
376}
377
378/// [`FastString`] can be made cheaply from [`Url`] as we know it's owned and don't need to do an
379/// ASCII check.
380impl From<Url> for FastString {
381  fn from(value: Url) -> Self {
382    let s: String = value.into();
383    s.into()
384  }
385}
386
387/// [`FastString`] can be made cheaply from [`String`] as we know it's owned and don't need to do an
388/// ASCII check.
389impl From<String> for FastString {
390  fn from(value: String) -> Self {
391    Self {
392      inner: FastStringInner::Owned(value.into_boxed_str()),
393    }
394  }
395}
396
397/// [`FastString`] can be made cheaply from [`Arc<str>`] as we know it's shared and don't need to do an
398/// ASCII check.
399impl From<Arc<str>> for FastString {
400  fn from(value: Arc<str>) -> Self {
401    Self {
402      inner: FastStringInner::Arc(value),
403    }
404  }
405}
406
407impl serde::Serialize for FastString {
408  fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
409  where
410    S: Serializer,
411  {
412    serializer.serialize_str(self.as_str())
413  }
414}
415
416type DeserializeProxy<'de> = &'de str;
417
418impl<'de> serde::Deserialize<'de> for FastString {
419  fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
420  where
421    D: Deserializer<'de>,
422  {
423    DeserializeProxy::<'de>::deserialize(deserializer)
424      .map(|v| v.to_owned().into())
425  }
426}
427
428/// Include a fast string in the binary. This string is asserted at compile-time to be 7-bit ASCII for optimal
429/// v8 performance.
430///
431/// This macro creates a [`FastStaticString`] that may be converted to a [`FastString`] via [`Into::into`].
432#[macro_export]
433macro_rules! ascii_str_include {
434  ($file:expr) => {{
435    const STR: $crate::v8::OneByteConst =
436      $crate::FastStaticString::create_external_onebyte_const(
437        ::std::include_str!($file).as_bytes(),
438      );
439    let s: &'static $crate::v8::OneByteConst = &STR;
440    $crate::FastStaticString::new(s)
441  }};
442}
443
444/// Include a fast string in the binary from a string literal. This string is asserted at compile-time to be
445/// 7-bit ASCII for optimal v8 performance.
446///
447/// This macro creates a [`FastStaticString`] that may be converted to a [`FastString`] via [`Into::into`].
448#[macro_export]
449macro_rules! ascii_str {
450  ($str:expr) => {{
451    const C: $crate::v8::OneByteConst =
452      $crate::FastStaticString::create_external_onebyte_const($str.as_bytes());
453    unsafe { std::mem::transmute::<_, $crate::FastStaticString>(&C) }
454  }};
455}
456
457/// Used to generate the fast, const versions of op names. Internal only.
458#[macro_export]
459#[doc(hidden)]
460macro_rules! __op_name_fast {
461  ($op:ident) => {{
462    const LITERAL: &'static [u8] = stringify!($op).as_bytes();
463    const STR: $crate::v8::OneByteConst =
464      $crate::FastStaticString::create_external_onebyte_const(LITERAL);
465    let s: &'static $crate::v8::OneByteConst = &STR;
466    (stringify!($op), $crate::FastStaticString::new(s))
467  }};
468}
469
470#[cfg(test)]
471mod tests {
472  use super::*;
473
474  // TODO(mmastrac): we need to upstream as_str() on OneByteConst to rusty_v8 to get
475  // rid of the hacks that break miri.
476  #[cfg(not(miri))]
477  #[test]
478  fn string_eq() {
479    let s: FastString = ascii_str!("Testing").into();
480    assert_eq!("Testing", s.as_str());
481    let s2 = FastString::from_static("Testing");
482    assert_eq!(s, s2);
483    let (s1, s2) = s.into_cheap_copy();
484    assert_eq!("Testing", s1.as_str());
485    assert_eq!("Testing", s2.as_str());
486
487    let s = FastString::from("Testing".to_owned());
488    assert_eq!("Testing", s.as_str());
489    let (s1, s2) = s.into_cheap_copy();
490    assert_eq!("Testing", s1.as_str());
491    assert_eq!("Testing", s2.as_str());
492  }
493
494  #[cfg(not(miri))]
495  #[test]
496  fn truncate() {
497    let mut s = "123456".to_owned();
498    s.truncate(3);
499
500    let mut code: FastString = ascii_str!("123456").into();
501    code.truncate(3);
502    assert_eq!(s, code.as_ref());
503
504    let mut code: FastString = "123456".to_owned().into();
505    code.truncate(3);
506    assert_eq!(s, code.as_ref());
507
508    let arc_str: Arc<str> = "123456".into();
509    let mut code: FastString = arc_str.into();
510    code.truncate(3);
511    assert_eq!(s, code.as_ref());
512  }
513
514  #[test]
515  fn test_large_include() {
516    // This test would require an excessively large file in the repo, so we just run this manually
517    // ascii_str_include!("runtime/tests/large_string.txt");
518    // ascii_str_include!(concat!("runtime", "/tests/", "large_string.txt"));
519  }
520
521  /// Ensure that all of our macros compile properly in a static context.
522  #[test]
523  fn test_const() {
524    const _: (&str, FastStaticString) = __op_name_fast!(op_name);
525    const _: FastStaticString = ascii_str!("hmm");
526    const _: FastStaticString = ascii_str!(concat!("hmm", "hmmmmm"));
527    const _: FastStaticString = ascii_str_include!("Cargo.toml");
528    const _: FastStaticString = ascii_str_include!(concat!("./", "Cargo.toml"));
529  }
530}