stringleton_registry/symbol.rs
1use core::{hash::Hash, ptr::NonNull};
2
3#[cfg(all(not(feature = "alloc"), feature = "std"))]
4use std as alloc;
5
6#[cfg(feature = "alloc")]
7use alloc::{borrow::Cow, string::String};
8
9use crate::Registry;
10
11/// Interned string with very fast comparison and hashing.
12///
13/// Symbols should typically be used as extremely fast program-internal
14/// identifiers.
15///
16/// # Comparison
17///
18/// Symbol comparison is just comparison between single pointers, which are
19/// guaranteed to be identical for identical strings. No O(n) string comparisons
20/// occur.
21///
22/// The implementation of `Ord` between symbols also does **not** perform string
23/// comparison, but rather compares pointer values. However, the
24/// `PartialOrd<str>` implementations do compare strings.
25///
26/// # Hashing
27///
28/// The hash value of a symbol is not predictable, as it depends on specific
29/// pointer values, which are determined by both linking order and heap layout
30/// (for dynamically created symbols). In particular, the hash value of `Symbol`
31/// is **not** the same as the hash value of the underlying `str`.
32///
33/// For this reason, `Symbol` does not implement `Borrow<str>`, which would
34/// imply that it would hash to the same value as its corresponding string
35/// value. To prevent accidents, `Symbol` also does not implement `Deref<Target
36/// = str>` (_this restriction may be lifted in future_).
37///
38/// The hash value of symbols may change even between invocations of the same
39/// binary, so should not be relied upon in any way.
40///
41/// # Leaks
42///
43/// Once created, symbols are never freed, and there is no way to
44/// "garbage-collect" symbols. This means that dynamically creating symbols from
45/// user input or runtime data is a great way to create memory leaks. Use only
46/// for static or semi-static identifiers, or otherwise trusted input.
47#[derive(Clone, Copy)]
48#[repr(transparent)] // Must be transparent because of `StaticSym::deref`.
49pub struct Symbol(&'static &'static str);
50
51impl Symbol {
52 /// Create a deduplicated symbol at runtime.
53 ///
54 /// All calls to this function with the same string argument will return a
55 /// bit-identical `Symbol`.
56 ///
57 /// This function has some overhead, because it needs to take at least a
58 /// global read-lock, and potentially a write-lock if the string has not
59 /// been seen before. Additionally, opposed to
60 /// [`new_static()`](Self::new_static), this function also needs to allocate
61 /// a copy of the string on the heap and leak it.
62 ///
63 /// When the string is statically known at compile time, prefer the
64 /// [`sym!(...)`](../stringleton/macro.sym.html) macro. When the string is
65 /// statically known to live forever, prefer
66 /// [`new_static()`](Self::new_static).
67 ///
68 /// Please note that symbols are never "garbage collected", so creating an
69 /// unbounded number of symbols in this way can be considered a memory leak.
70 /// In particular, creating symbols from untrusted user input is a
71 /// denial-of-service hazard.
72 #[inline]
73 #[must_use]
74 #[cfg(feature = "alloc")]
75 pub fn new(string: impl AsRef<str>) -> Symbol {
76 Self::new_(string.as_ref())
77 }
78
79 #[inline]
80 #[must_use]
81 #[cfg(feature = "alloc")]
82 fn new_(string: &str) -> Symbol {
83 Registry::global().get_or_insert(string)
84 }
85
86 /// Create a deduplicated symbol at runtime from a static reference to a
87 /// static string.
88 ///
89 /// If the symbol has not previously been registered, this sidesteps the
90 /// need to allocate and leak the string. Using this function does not
91 /// allocate memory, outside of what is needed for registering the symbol
92 /// for subsequent lookups.
93 ///
94 /// This function has some overhead, because it needs to take at least a
95 /// global read lock, and potentially a write-lock if the string has not
96 /// been seen before.
97 ///
98 /// When the string is statically known at compile time, prefer the
99 /// [`sym!(...)`](../stringleton/macro.sym.html) macro.
100 ///
101 /// The use case for this function is the scenario when a string is only
102 /// known at runtime, but the caller wants to allocate it. For example, the
103 /// string could be part of a larger (manually leaked) allocation.
104 #[inline]
105 #[must_use]
106 pub fn new_static(string: &'static &'static str) -> Symbol {
107 Registry::global().get_or_insert_static(string)
108 }
109
110 /// Get a previously registered symbol.
111 ///
112 /// This returns `None` if the string has not previously been registered.
113 ///
114 /// This function has some overhead, because it needs to acquire a global
115 /// read-lock, but it is faster than [`Symbol::new()`] and never leaks
116 /// memory.
117 pub fn get(string: impl AsRef<str>) -> Option<Symbol> {
118 Self::get_(string.as_ref())
119 }
120
121 #[inline]
122 fn get_(string: &str) -> Option<Symbol> {
123 Registry::global().get(string)
124 }
125
126 /// New pre-interned symbol
127 ///
128 /// # Safety
129 ///
130 /// `registered_symbol` must be a globally unique string reference (i.e., it
131 /// has already been interned through the global registry).
132 ///
133 /// The only valid external usage of this function is to call it with a
134 /// value previously returned from [`Symbol::inner()`].
135 #[inline]
136 #[must_use]
137 pub unsafe fn new_unchecked(registered_symbol: &'static &'static str) -> Symbol {
138 Symbol(registered_symbol)
139 }
140
141 /// Get the string representation of this symbol.
142 ///
143 /// This operation is guaranteed to not take any locks, and is effectively
144 /// free.
145 #[inline]
146 #[must_use]
147 pub const fn as_str(&self) -> &'static str {
148 self.0
149 }
150
151 /// Get the underlying representation of this symbol.
152 #[inline]
153 #[must_use]
154 pub const fn inner(&self) -> &'static &'static str {
155 self.0
156 }
157
158 /// Get the underlying pointer value of this symbol.
159 ///
160 /// This is the basis for computing equality and hashes. Symbols
161 /// representing the same string always have the same pointer value.
162 #[inline]
163 #[must_use]
164 pub const fn as_ptr(&self) -> NonNull<&'static str> {
165 // SAFETY: Trivial. A static reference cannot be null. This unsafe block
166 // can be removed once `#[feature(non_null_from_ref)]` is stabilized.
167 unsafe { NonNull::new_unchecked(core::ptr::from_ref::<&'static str>(self.0) as *mut _) }
168 }
169
170 /// Convert the symbol to an FFI-friendly `u64`.
171 #[inline]
172 #[must_use]
173 pub fn to_ffi(&self) -> u64 {
174 self.as_ptr().as_ptr() as usize as u64
175 }
176
177 /// Reconstitute a symbol from a value previously produced by
178 /// [`to_ffi()`](Symbol::to_ffi).
179 ///
180 /// # Safety
181 ///
182 /// `value` must be produced from a previous call to `to_ffi()` in the
183 /// current process, and by the exact same version of this crate.
184 ///
185 /// In effect, this function can *only* be used for roundtrips through
186 /// foreign code.
187 #[inline]
188 #[must_use]
189 #[allow(clippy::cast_possible_truncation)] // We don't have 128-bit pointers
190 pub unsafe fn from_ffi(value: u64) -> Symbol {
191 unsafe { Self::new_unchecked(&*(value as usize as *const &'static str)) }
192 }
193
194 /// Reconstitute a symbol from a value previously produced by
195 /// [`to_ffi()`](Symbol::to_ffi), checking if it is valid.
196 ///
197 /// This involves taking a global read-lock to determine the validity of
198 /// `value`.
199 #[inline]
200 #[must_use]
201 pub fn try_from_ffi(value: u64) -> Option<Symbol> {
202 Registry::global().get_by_address(value)
203 }
204
205 /// Length of the underlying string.
206 #[inline]
207 #[must_use]
208 pub const fn len(&self) -> usize {
209 self.0.len()
210 }
211
212 /// Whether or not this is the empty symbol.
213 #[inline]
214 #[must_use]
215 pub const fn is_empty(&self) -> bool {
216 self.0.is_empty()
217 }
218}
219
220impl PartialEq for Symbol {
221 #[inline]
222 fn eq(&self, other: &Self) -> bool {
223 self.as_ptr() == other.as_ptr()
224 }
225}
226
227impl Eq for Symbol {}
228
229impl PartialEq<str> for Symbol {
230 #[inline]
231 fn eq(&self, other: &str) -> bool {
232 *self.as_str() == *other
233 }
234}
235
236impl PartialEq<&str> for Symbol {
237 #[inline]
238 fn eq(&self, other: &&str) -> bool {
239 *self.as_str() == **other
240 }
241}
242
243impl PartialEq<Symbol> for str {
244 #[inline]
245 fn eq(&self, other: &Symbol) -> bool {
246 *self == *other.as_str()
247 }
248}
249
250impl PartialEq<Symbol> for &str {
251 #[inline]
252 fn eq(&self, other: &Symbol) -> bool {
253 **self == *other.as_str()
254 }
255}
256
257impl PartialOrd for Symbol {
258 #[inline]
259 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
260 Some(self.cmp(other))
261 }
262}
263
264impl Ord for Symbol {
265 #[inline]
266 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
267 self.as_ptr().cmp(&other.as_ptr())
268 }
269}
270
271impl PartialOrd<str> for Symbol {
272 #[inline]
273 fn partial_cmp(&self, other: &str) -> Option<core::cmp::Ordering> {
274 Some(self.as_str().cmp(other))
275 }
276}
277
278impl PartialOrd<&str> for Symbol {
279 #[inline]
280 fn partial_cmp(&self, other: &&str) -> Option<core::cmp::Ordering> {
281 Some(self.as_str().cmp(*other))
282 }
283}
284
285impl PartialOrd<Symbol> for str {
286 #[inline]
287 fn partial_cmp(&self, other: &Symbol) -> Option<core::cmp::Ordering> {
288 Some(self.cmp(other.as_str()))
289 }
290}
291
292impl PartialOrd<Symbol> for &str {
293 #[inline]
294 fn partial_cmp(&self, other: &Symbol) -> Option<core::cmp::Ordering> {
295 Some((*self).cmp(other.as_str()))
296 }
297}
298
299impl Hash for Symbol {
300 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
301 self.as_ptr().hash(state);
302 }
303}
304
305impl AsRef<str> for Symbol {
306 #[inline]
307 fn as_ref(&self) -> &str {
308 self.as_str()
309 }
310}
311
312#[cfg(feature = "alloc")]
313impl From<&str> for Symbol {
314 #[inline]
315 fn from(value: &str) -> Self {
316 Symbol::new(value)
317 }
318}
319
320#[cfg(feature = "alloc")]
321impl From<String> for Symbol {
322 #[inline]
323 fn from(value: String) -> Self {
324 Symbol::new(&*value)
325 }
326}
327
328#[cfg(feature = "alloc")]
329impl<'a> From<Cow<'a, str>> for Symbol {
330 fn from(value: Cow<'a, str>) -> Self {
331 Symbol::new(&*value)
332 }
333}
334
335/// Note: This impl forwards string formatting options to the underlying string.
336impl core::fmt::Display for Symbol {
337 #[inline]
338 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
339 core::fmt::Display::fmt(self.as_str(), f)
340 }
341}
342
343/// Note: This impl forwards string formatting options to the underlying string.
344impl core::fmt::Debug for Symbol {
345 #[inline]
346 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
347 core::fmt::Debug::fmt(self.as_str(), f)
348 }
349}
350
351#[cfg(feature = "serde")]
352const _: () = {
353 impl serde::Serialize for Symbol {
354 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
355 where
356 S: serde::Serializer,
357 {
358 self.as_str().serialize(serializer)
359 }
360 }
361
362 #[cfg(feature = "alloc")]
363 impl<'de> serde::Deserialize<'de> for Symbol {
364 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
365 where
366 D: serde::Deserializer<'de>,
367 {
368 let s = Cow::<'de, str>::deserialize(deserializer)?;
369 Ok(Symbol::new(&*s))
370 }
371 }
372
373 #[cfg(not(feature = "alloc"))]
374 impl<'de> serde::Deserialize<'de> for Symbol {
375 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
376 where
377 D: serde::Deserializer<'de>,
378 {
379 let s = <&'de str>::deserialize(deserializer)?;
380 Ok(Symbol::new(&*s))
381 }
382 }
383};