turbo_rcstr/
lib.rs

1use std::{
2    borrow::{Borrow, Cow},
3    ffi::OsStr,
4    fmt::{Debug, Display},
5    hash::{Hash, Hasher},
6    mem::forget,
7    num::NonZeroU8,
8    ops::Deref,
9    path::{Path, PathBuf},
10};
11
12use debug_unreachable::debug_unreachable;
13use serde::{Deserialize, Deserializer, Serialize, Serializer};
14use triomphe::Arc;
15use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};
16
17use crate::{dynamic::new_atom, tagged_value::TaggedValue};
18
19mod dynamic;
20mod tagged_value;
21
22/// An immutable reference counted [`String`], similar to [`Arc<String>`][std::sync::Arc].
23///
24/// This is the preferred immutable string type for [`turbo_task::function`][macro@crate::function]
25/// arguments and inside of [`turbo_task::value`][macro@crate::value].
26///
27/// As turbo-tasks must store copies of function arguments to enable caching, non-reference counted
28/// [`String`]s would incur frequent cloning. Reference counting typically decreases memory
29/// consumption and CPU time in these cases.
30///
31/// ## Conversion
32///
33/// Converting a `String` or `&str` to an `RcStr` can be perfomed using `.into()` or
34/// `RcStr::from(...)`:
35///
36/// ```
37/// # use turbo_rcstr::RcStr;
38/// #
39/// let s = "foo";
40/// let rc_s1: RcStr = s.into();
41/// let rc_s2 = RcStr::from(s);
42/// assert_eq!(rc_s1, rc_s2);
43/// ```
44///
45/// Converting from an [`RcStr`] to a `&str` should be done with [`RcStr::as_str`]. Converting to a
46/// `String` should be done with [`RcStr::into_owned`].
47///
48/// ## Future Optimizations
49///
50/// This type is intentionally opaque to allow for optimizations to the underlying representation.
51/// Future implementations may use inline representations or interning.
52//
53// If you want to change the underlying string type to `Arc<str>`, please ensure that you profile
54// performance. The current implementation offers very cheap `String -> RcStr -> String`, meaning we
55// only pay for the allocation for `Arc` when we pass `format!("").into()` to a function.
56pub struct RcStr {
57    unsafe_data: TaggedValue,
58}
59
60unsafe impl Send for RcStr {}
61unsafe impl Sync for RcStr {}
62
63const DYNAMIC_TAG: u8 = 0b_00;
64const INLINE_TAG: u8 = 0b_01; // len in upper nybble
65const INLINE_TAG_INIT: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(INLINE_TAG) };
66const TAG_MASK: u8 = 0b_11;
67const LEN_OFFSET: usize = 4;
68const LEN_MASK: u8 = 0xf0;
69
70impl RcStr {
71    #[inline(always)]
72    fn tag(&self) -> u8 {
73        self.unsafe_data.tag() & TAG_MASK
74    }
75
76    #[inline(never)]
77    pub fn as_str(&self) -> &str {
78        match self.tag() {
79            DYNAMIC_TAG => unsafe { dynamic::deref_from(self.unsafe_data) },
80            INLINE_TAG => {
81                let len = (self.unsafe_data.tag() & LEN_MASK) >> LEN_OFFSET;
82                let src = self.unsafe_data.data();
83                unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
84            }
85            _ => unsafe { debug_unreachable!() },
86        }
87    }
88
89    /// Returns an owned mutable [`String`].
90    ///
91    /// This implementation is more efficient than [`ToString::to_string`]:
92    ///
93    /// - If the reference count is 1, the `Arc` can be unwrapped, giving ownership of the
94    ///   underlying string without cloning in `O(1)` time.
95    /// - This avoids some of the potential overhead of the `Display` trait.
96    pub fn into_owned(self) -> String {
97        match self.tag() {
98            DYNAMIC_TAG => {
99                let arc = unsafe { dynamic::restore_arc(self.unsafe_data) };
100
101                match Arc::try_unwrap(arc.clone()) {
102                    Ok(v) => v,
103                    Err(arc) => {
104                        let s = arc.to_string();
105                        forget(arc);
106                        s
107                    }
108                }
109            }
110            INLINE_TAG => self.as_str().to_string(),
111            _ => unsafe { debug_unreachable!() },
112        }
113    }
114
115    pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
116        RcStr::from(Cow::Owned(f(self.into_owned())))
117    }
118
119    #[inline]
120    pub(crate) fn from_alias(alias: TaggedValue) -> Self {
121        if alias.tag() & TAG_MASK == DYNAMIC_TAG {
122            unsafe {
123                let arc = dynamic::restore_arc(alias);
124                forget(arc.clone());
125                forget(arc);
126            }
127        }
128
129        Self { unsafe_data: alias }
130    }
131}
132
133impl DeterministicHash for RcStr {
134    fn deterministic_hash<H: DeterministicHasher>(&self, state: &mut H) {
135        state.write_usize(self.len());
136        state.write_bytes(self.as_bytes());
137    }
138}
139
140impl Deref for RcStr {
141    type Target = str;
142
143    fn deref(&self) -> &Self::Target {
144        self.as_str()
145    }
146}
147
148impl Borrow<str> for RcStr {
149    fn borrow(&self) -> &str {
150        self.as_str()
151    }
152}
153
154impl From<Arc<String>> for RcStr {
155    fn from(s: Arc<String>) -> Self {
156        match Arc::try_unwrap(s) {
157            Ok(v) => new_atom(Cow::Owned(v)),
158            Err(arc) => new_atom(Cow::Borrowed(&**arc)),
159        }
160    }
161}
162
163impl From<String> for RcStr {
164    fn from(s: String) -> Self {
165        new_atom(Cow::Owned(s))
166    }
167}
168
169impl From<&'_ str> for RcStr {
170    fn from(s: &str) -> Self {
171        new_atom(Cow::Borrowed(s))
172    }
173}
174
175impl From<Cow<'_, str>> for RcStr {
176    fn from(s: Cow<str>) -> Self {
177        new_atom(s)
178    }
179}
180
181/// Mimic `&str`
182impl AsRef<Path> for RcStr {
183    fn as_ref(&self) -> &Path {
184        self.as_str().as_ref()
185    }
186}
187
188/// Mimic `&str`
189impl AsRef<OsStr> for RcStr {
190    fn as_ref(&self) -> &OsStr {
191        self.as_str().as_ref()
192    }
193}
194
195/// Mimic `&str`
196impl AsRef<[u8]> for RcStr {
197    fn as_ref(&self) -> &[u8] {
198        self.as_str().as_ref()
199    }
200}
201
202impl PartialEq<str> for RcStr {
203    fn eq(&self, other: &str) -> bool {
204        self.as_str() == other
205    }
206}
207
208impl PartialEq<&'_ str> for RcStr {
209    fn eq(&self, other: &&str) -> bool {
210        self.as_str() == *other
211    }
212}
213
214impl PartialEq<String> for RcStr {
215    fn eq(&self, other: &String) -> bool {
216        self.as_str() == other.as_str()
217    }
218}
219
220impl Debug for RcStr {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        Debug::fmt(&self.as_str(), f)
223    }
224}
225
226impl Display for RcStr {
227    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228        Display::fmt(&self.as_str(), f)
229    }
230}
231
232impl From<RcStr> for String {
233    fn from(s: RcStr) -> Self {
234        s.into_owned()
235    }
236}
237
238impl From<RcStr> for PathBuf {
239    fn from(s: RcStr) -> Self {
240        String::from(s).into()
241    }
242}
243
244impl Clone for RcStr {
245    #[inline(always)]
246    fn clone(&self) -> Self {
247        Self::from_alias(self.unsafe_data)
248    }
249}
250
251impl Default for RcStr {
252    fn default() -> Self {
253        RcStr::from("")
254    }
255}
256
257impl PartialEq for RcStr {
258    fn eq(&self, other: &Self) -> bool {
259        self.as_str() == other.as_str()
260    }
261}
262
263impl Eq for RcStr {}
264
265impl PartialOrd for RcStr {
266    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
267        Some(self.cmp(other))
268    }
269}
270
271impl Ord for RcStr {
272    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
273        self.as_str().cmp(other.as_str())
274    }
275}
276
277impl Hash for RcStr {
278    fn hash<H: Hasher>(&self, state: &mut H) {
279        self.as_str().hash(state);
280    }
281}
282
283impl Serialize for RcStr {
284    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
285        serializer.serialize_str(self.as_str())
286    }
287}
288
289impl<'de> Deserialize<'de> for RcStr {
290    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
291        let s = String::deserialize(deserializer)?;
292        Ok(RcStr::from(s))
293    }
294}
295
296impl Drop for RcStr {
297    fn drop(&mut self) {
298        if self.tag() == DYNAMIC_TAG {
299            unsafe { drop(dynamic::restore_arc(self.unsafe_data)) }
300        }
301    }
302}