stagiaire/
lib.rs

1//! A string interner.
2//!
3//! A string interner stores a pool of immutable strings keeping a single copy
4//! of each string value.  A [`Symbol`] is a wrapper over a pointer to
5//! one of these unique string values.  Symbols can be compared quickly (pointer
6//! rather than string comparisons) and are cheaper to store than strings when
7//! several occurrences of a given string exist.
8//!
9//! # Examples
10//!
11//! ```
12//! use stagiaire::Symbol;
13//!
14//! // Create a new symbol.
15//! let a_foo = Symbol::new("foo");
16//! assert_eq!(a_foo.as_str(), "foo");
17//!
18//! // Create another symbol that refers to an existing value.
19//! let another_foo = Symbol::new("foo");
20//! assert_eq!(a_foo, another_foo);
21//!
22//! // Both symbols point to the same underlying value.
23//! assert_eq!(a_foo.as_str().as_ptr(), another_foo.as_str().as_ptr());
24//!
25//! // A symbol has the same size as a reference.
26//! assert_eq!(std::mem::size_of::<Symbol>(), std::mem::size_of::<&str>());
27//!
28//! // Symbols pointing to different values are not equal.
29//! let a_bar = Symbol::new("bar");
30//! assert_ne!(a_bar, a_foo);
31//! ```
32//!
33//! # Lifetime
34//!
35//! The interner is a process-wide singleton not exposed programmatically and
36//! string values stored there persist until the owning process terminates and
37//! have therefore a `'static` lifetime.
38//!
39//! # Thread-safety
40//!
41//! [`Symbol`] values can be created and accessed from multiple threads.
42//!
43//! # Serialization
44//!
45//! A [`Symbol`] can optionally be serialized and deserialized using
46//! [serde](https://crates.io/crates/serde).  To enable this, build with the
47//! `serde` feature on.
48//!
49//! [`Symbol`]: struct.Symbol.html
50
51use std::collections::HashSet;
52use std::fmt::Display;
53use std::hash::{Hash, Hasher};
54use std::sync::Mutex;
55
56use lazy_static::lazy_static;
57#[cfg(serde)]
58use serde::{Deserialize, Deserializer, Serialize, Serializer};
59
60/// Wrapper over a reference to an interned string.
61///
62/// See crate-level documentation for example and details.
63#[derive(Debug, Clone, Copy)]
64pub struct Symbol {
65    inner: &'static str,
66}
67
68impl Symbol {
69    /// Inserts in the pool the value `s` if it is no already there and returns
70    /// a symbol pointing to this new value or the existing one.
71    pub fn new<R: AsRef<str>>(s: R) -> Symbol {
72        Symbol {
73            inner: intern(s.as_ref()),
74        }
75    }
76
77    /// Returns a reference to the string pointed to by this symbol.
78    pub fn as_str(&self) -> &'static str {
79        self.inner
80    }
81}
82
83impl From<&str> for Symbol {
84    /// Generates a symbol for `source`.
85    fn from(source: &str) -> Self {
86        Symbol::new(source)
87    }
88}
89
90impl PartialEq for Symbol {
91    fn eq(&self, other: &Symbol) -> bool {
92        self.inner.as_ptr() == other.inner.as_ptr()
93    }
94}
95
96impl Eq for Symbol {}
97
98// Implement mixed comparisons.
99// Code lifted from String implementation.
100// I do not understand why &'a str versions are required nor how they work.
101// I naively thought str versions would be sufficient.
102
103impl PartialEq<str> for Symbol {
104    fn eq(&self, other: &str) -> bool {
105        self.inner[..] == other[..]
106    }
107}
108
109impl PartialEq<Symbol> for str {
110    fn eq(&self, other: &Symbol) -> bool {
111        self[..] == other.inner[..]
112    }
113}
114
115impl<'a> PartialEq<Symbol> for &'a str {
116    fn eq(&self, other: &Symbol) -> bool {
117        self[..] == other.inner[..]
118    }
119}
120
121impl<'a> PartialEq<&'a str> for Symbol {
122    fn eq(&self, other: &&'a str) -> bool {
123        self.inner[..] == other[..]
124    }
125}
126
127impl Hash for Symbol {
128    /// Returns a hash of the pointer wrapped by this symbol (rather than the
129    /// pointed-to string content).
130    fn hash<H: Hasher>(&self, state: &mut H) {
131        self.inner.as_ptr().hash(state);
132    }
133}
134
135impl Display for Symbol {
136    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137        write!(f, "{}", self.as_str())
138    }
139}
140
141#[cfg(serde)]
142impl Serialize for Symbol {
143    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
144    where
145        S: Serializer,
146    {
147        serializer.serialize_str(self.inner)
148    }
149}
150
151#[cfg(serde)]
152impl<'de> Deserialize<'de> for Symbol {
153    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
154    where
155        D: Deserializer<'de>,
156    {
157        let s: String = Deserialize::deserialize(deserializer)?;
158        Ok(Symbol::new(s))
159    }
160}
161
162lazy_static! {
163    // All strings interned so far.
164    static ref STRINGS : Mutex<HashSet<&'static str>> = {
165        Mutex::new(HashSet::new())
166    };
167}
168
169// Returns a reference to a string that has the same value as `s` and is guaranteed to be unique.
170fn intern(str: &str) -> &'static str {
171    let mut g = STRINGS.lock().unwrap();
172    // TODO: Use HashSet::get_or_insert() when stabilized
173    match g.get(str) {
174        Some(s) => s,
175        None => {
176            let b = Box::new(str.to_string());
177            let s = Box::leak(b).as_str();
178            g.insert(s);
179            s
180        }
181    }
182}