stringleton_registry/
symbol.rs

1use core::{hash::Hash, ptr::NonNull};
2
3#[cfg(all(not(feature = "alloc"), feature = "std"))]
4use std as alloc;
5
6#[cfg(feature = "alloc")]
7use alloc::{borrow::Cow, string::String};
8
9use crate::Registry;
10
11/// Interned string with very fast comparison and hashing.
12///
13/// Symbols should typically be used as extremely fast program-internal
14/// identifiers.
15///
16/// # Comparison
17///
18/// Symbol comparison is just comparison between single pointers, which are
19/// guaranteed to be identical for identical strings. No O(n) string comparisons
20/// occur.
21///
22/// The implementation of `Ord` between symbols also does **not** perform string
23/// comparison, but rather compares pointer values. However, the
24/// `PartialOrd<str>` implementations do compare strings.
25///
26/// # Hashing
27///
28/// The hash value of a symbol is not predictable, as it depends on specific
29/// pointer values, which are determined by both linking order and heap layout
30/// (for dynamically created symbols). In particular, the hash value of `Symbol`
31/// is **not** the same as the hash value of the underlying `str`.
32///
33/// For this reason, `Symbol` does not implement `Borrow<str>`, which would
34/// imply that it would hash to the same value as its corresponding string
35/// value. To prevent accidents, `Symbol` also does not implement `Deref<Target
36/// = str>` (_this restriction may be lifted in future_).
37///
38/// The hash value of symbols may change even between invocations of the same
39/// binary, so should not be relied upon in any way.
40///
41/// # Leaks
42///
43/// Once created, symbols are never freed, and there is no way to
44/// "garbage-collect" symbols. This means that dynamically creating symbols from
45/// user input or runtime data is a great way to create memory leaks. Use only
46/// for static or semi-static identifiers, or otherwise trusted input.
47#[derive(Clone, Copy)]
48#[repr(transparent)] // Must be transparent because of `StaticSym::deref`.
49pub struct Symbol(&'static &'static str);
50
51impl Symbol {
52    /// Create a deduplicated symbol at runtime.
53    ///
54    /// All calls to this function with the same string argument will return a
55    /// bit-identical `Symbol`.
56    ///
57    /// This function has some overhead, because it needs to take at least a
58    /// global read-lock, and potentially a write-lock if the string has not
59    /// been seen before. Additionally, opposed to
60    /// [`new_static()`](Self::new_static), this function also needs to allocate
61    /// a copy of the string on the heap and leak it.
62    ///
63    /// When the string is statically known at compile time, prefer the
64    /// [`sym!(...)`](../stringleton/macro.sym.html) macro. When the string is
65    /// statically known to live forever, prefer
66    /// [`new_static()`](Self::new_static).
67    ///
68    /// Please note that symbols are never "garbage collected", so creating an
69    /// unbounded number of symbols in this way can be considered a memory leak.
70    /// In particular, creating symbols from untrusted user input is a
71    /// denial-of-service hazard.
72    #[inline]
73    #[must_use]
74    #[cfg(feature = "alloc")]
75    pub fn new(string: impl AsRef<str>) -> Symbol {
76        Self::new_(string.as_ref())
77    }
78
79    #[inline]
80    #[must_use]
81    #[cfg(feature = "alloc")]
82    fn new_(string: &str) -> Symbol {
83        Registry::global().get_or_insert(string)
84    }
85
86    /// Create a deduplicated symbol at runtime from a static reference to a
87    /// static string.
88    ///
89    /// If the symbol has not previously been registered, this sidesteps the
90    /// need to allocate and leak the string. Using this function does not
91    /// allocate memory, outside of what is needed for registering the symbol
92    /// for subsequent lookups.
93    ///
94    /// This function has some overhead, because it needs to take at least a
95    /// global read lock, and potentially a write-lock if the string has not
96    /// been seen before.
97    ///
98    /// When the string is statically known at compile time, prefer the
99    /// [`sym!(...)`](../stringleton/macro.sym.html) macro.
100    ///
101    /// The use case for this function is the scenario when a string is only
102    /// known at runtime, but the caller wants to allocate it. For example, the
103    /// string could be part of a larger (manually leaked) allocation.
104    #[inline]
105    #[must_use]
106    pub fn new_static(string: &'static &'static str) -> Symbol {
107        Registry::global().get_or_insert_static(string)
108    }
109
110    /// Get a previously registered symbol.
111    ///
112    /// This returns `None` if the string has not previously been registered.
113    ///
114    /// This function has some overhead, because it needs to acquire a global
115    /// read-lock, but it is faster than [`Symbol::new()`] and never leaks
116    /// memory.
117    pub fn get(string: impl AsRef<str>) -> Option<Symbol> {
118        Self::get_(string.as_ref())
119    }
120
121    #[inline]
122    fn get_(string: &str) -> Option<Symbol> {
123        Registry::global().get(string)
124    }
125
126    /// New pre-interned symbol
127    ///
128    /// # Safety
129    ///
130    /// `registered_symbol` must be a globally unique string reference (i.e., it
131    /// has already been interned through the global registry).
132    ///
133    /// The only valid external usage of this function is to call it with a
134    /// value previously returned from [`Symbol::inner()`].
135    #[inline]
136    #[must_use]
137    pub unsafe fn new_unchecked(registered_symbol: &'static &'static str) -> Symbol {
138        Symbol(registered_symbol)
139    }
140
141    /// Get the string representation of this symbol.
142    ///
143    /// This operation is guaranteed to not take any locks, and is effectively
144    /// free.
145    #[inline]
146    #[must_use]
147    pub const fn as_str(&self) -> &'static str {
148        self.0
149    }
150
151    /// Get the underlying representation of this symbol.
152    #[inline]
153    #[must_use]
154    pub const fn inner(&self) -> &'static &'static str {
155        self.0
156    }
157
158    /// Get the underlying pointer value of this symbol.
159    ///
160    /// This is the basis for computing equality and hashes. Symbols
161    /// representing the same string always have the same pointer value.
162    #[inline]
163    #[must_use]
164    pub const fn as_ptr(&self) -> NonNull<&'static str> {
165        // SAFETY: Trivial. A static reference cannot be null. This unsafe block
166        // can be removed once `#[feature(non_null_from_ref)]` is stabilized.
167        unsafe { NonNull::new_unchecked(core::ptr::from_ref::<&'static str>(self.0) as *mut _) }
168    }
169
170    /// Convert the symbol to an FFI-friendly `u64`.
171    #[inline]
172    #[must_use]
173    pub fn to_ffi(&self) -> u64 {
174        self.as_ptr().as_ptr() as usize as u64
175    }
176
177    /// Reconstitute a symbol from a value previously produced by
178    /// [`to_ffi()`](Symbol::to_ffi).
179    ///
180    /// # Safety
181    ///
182    /// `value` must be produced from a previous call to `to_ffi()` in the
183    /// current process, and by the exact same version of this crate.
184    ///
185    /// In effect, this function can *only* be used for roundtrips through
186    /// foreign code.
187    #[inline]
188    #[must_use]
189    #[allow(clippy::cast_possible_truncation)] // We don't have 128-bit pointers
190    pub unsafe fn from_ffi(value: u64) -> Symbol {
191        unsafe { Self::new_unchecked(&*(value as usize as *const &'static str)) }
192    }
193
194    /// Reconstitute a symbol from a value previously produced by
195    /// [`to_ffi()`](Symbol::to_ffi), checking if it is valid.
196    ///
197    /// This involves taking a global read-lock to determine the validity of
198    /// `value`.
199    #[inline]
200    #[must_use]
201    pub fn try_from_ffi(value: u64) -> Option<Symbol> {
202        Registry::global().get_by_address(value)
203    }
204
205    /// Length of the underlying string.
206    #[inline]
207    #[must_use]
208    pub const fn len(&self) -> usize {
209        self.0.len()
210    }
211
212    /// Whether or not this is the empty symbol.
213    #[inline]
214    #[must_use]
215    pub const fn is_empty(&self) -> bool {
216        self.0.is_empty()
217    }
218}
219
220impl PartialEq for Symbol {
221    #[inline]
222    fn eq(&self, other: &Self) -> bool {
223        self.as_ptr() == other.as_ptr()
224    }
225}
226
227impl Eq for Symbol {}
228
229impl PartialEq<str> for Symbol {
230    #[inline]
231    fn eq(&self, other: &str) -> bool {
232        *self.as_str() == *other
233    }
234}
235
236impl PartialEq<&str> for Symbol {
237    #[inline]
238    fn eq(&self, other: &&str) -> bool {
239        *self.as_str() == **other
240    }
241}
242
243impl PartialEq<Symbol> for str {
244    #[inline]
245    fn eq(&self, other: &Symbol) -> bool {
246        *self == *other.as_str()
247    }
248}
249
250impl PartialEq<Symbol> for &str {
251    #[inline]
252    fn eq(&self, other: &Symbol) -> bool {
253        **self == *other.as_str()
254    }
255}
256
257impl PartialOrd for Symbol {
258    #[inline]
259    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
260        Some(self.cmp(other))
261    }
262}
263
264impl Ord for Symbol {
265    #[inline]
266    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
267        self.as_ptr().cmp(&other.as_ptr())
268    }
269}
270
271impl PartialOrd<str> for Symbol {
272    #[inline]
273    fn partial_cmp(&self, other: &str) -> Option<core::cmp::Ordering> {
274        Some(self.as_str().cmp(other))
275    }
276}
277
278impl PartialOrd<&str> for Symbol {
279    #[inline]
280    fn partial_cmp(&self, other: &&str) -> Option<core::cmp::Ordering> {
281        Some(self.as_str().cmp(*other))
282    }
283}
284
285impl PartialOrd<Symbol> for str {
286    #[inline]
287    fn partial_cmp(&self, other: &Symbol) -> Option<core::cmp::Ordering> {
288        Some(self.cmp(other.as_str()))
289    }
290}
291
292impl PartialOrd<Symbol> for &str {
293    #[inline]
294    fn partial_cmp(&self, other: &Symbol) -> Option<core::cmp::Ordering> {
295        Some((*self).cmp(other.as_str()))
296    }
297}
298
299impl Hash for Symbol {
300    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
301        self.as_ptr().hash(state);
302    }
303}
304
305impl AsRef<str> for Symbol {
306    #[inline]
307    fn as_ref(&self) -> &str {
308        self.as_str()
309    }
310}
311
312#[cfg(feature = "alloc")]
313impl From<&str> for Symbol {
314    #[inline]
315    fn from(value: &str) -> Self {
316        Symbol::new(value)
317    }
318}
319
320#[cfg(feature = "alloc")]
321impl From<String> for Symbol {
322    #[inline]
323    fn from(value: String) -> Self {
324        Symbol::new(&*value)
325    }
326}
327
328#[cfg(feature = "alloc")]
329impl<'a> From<Cow<'a, str>> for Symbol {
330    fn from(value: Cow<'a, str>) -> Self {
331        Symbol::new(&*value)
332    }
333}
334
335/// Note: This impl forwards string formatting options to the underlying string.
336impl core::fmt::Display for Symbol {
337    #[inline]
338    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
339        core::fmt::Display::fmt(self.as_str(), f)
340    }
341}
342
343/// Note: This impl forwards string formatting options to the underlying string.
344impl core::fmt::Debug for Symbol {
345    #[inline]
346    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
347        core::fmt::Debug::fmt(self.as_str(), f)
348    }
349}
350
351#[cfg(feature = "serde")]
352const _: () = {
353    impl serde::Serialize for Symbol {
354        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
355        where
356            S: serde::Serializer,
357        {
358            self.as_str().serialize(serializer)
359        }
360    }
361
362    #[cfg(feature = "alloc")]
363    impl<'de> serde::Deserialize<'de> for Symbol {
364        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
365        where
366            D: serde::Deserializer<'de>,
367        {
368            let s = Cow::<'de, str>::deserialize(deserializer)?;
369            Ok(Symbol::new(&*s))
370        }
371    }
372
373    #[cfg(not(feature = "alloc"))]
374    impl<'de> serde::Deserialize<'de> for Symbol {
375        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
376        where
377            D: serde::Deserializer<'de>,
378        {
379            let s = <&'de str>::deserialize(deserializer)?;
380            Ok(Symbol::new(&*s))
381        }
382    }
383};