simple_symbol/
lib.rs

1use std::cmp;
2use std::collections::HashMap;
3use std::fmt;
4use std::str;
5use std::sync;
6
7use once_cell::sync::Lazy;
8
9/// Global cache of interned strings
10static INTERNER: Lazy<sync::RwLock<Interner>> = Lazy::new(|| Default::default());
11
12/// Implements naive string internment.
13///
14/// Requires O(n) heap space to store unique strings, in
15/// return for O(1) symbol equality checks and faster symbol hashing.
16///
17/// Does **NOT** garbage collect interned strings: the memory
18/// is intentionally leaked for the duration of the program.
19/// This is only suitable for short-lived processes (e.g. compilers).
20#[derive(Debug, Default)]
21pub struct Interner {
22    index: HashMap<&'static str, usize>,
23    store: Vec<&'static str>,
24}
25
26/// Represents a unique string.
27///
28/// Only the same `Interner` that produced a `Symbol` can be used
29/// to resolve it to a string again.
30///
31/// # Example
32///
33/// ```rust
34/// use simple_symbol::{intern, resolve};
35///
36/// pub fn main() {
37///     let a = intern("A");
38///     let b = intern("A");
39///
40///     assert_eq!(a, a);
41///
42///     let c = intern("B");
43///
44///     assert_ne!(a, c);
45///     assert_ne!(b, c);
46///
47///     // Prints "A"
48///     println!("{}", a);
49///
50///     let str_a = resolve(a);
51///
52///     assert_eq!(str_a, "A");
53/// }
54/// ```
55#[derive(Copy, Clone, Eq, Hash, Ord, PartialEq)]
56pub struct Symbol(usize);
57
58impl Interner {
59    /// Store `string` in this interner if not already cached.
60    fn intern<S: AsRef<str>>(&mut self, string: S) -> Symbol {
61        let string = string.as_ref();
62        match self.index.get(string) {
63        | Some(&index) => Symbol(index),
64        | None => {
65            let owned = string.to_owned().into_boxed_str();
66            let leaked = Box::leak(owned);
67            let index = self.store.len();
68            self.store.push(leaked);
69            self.index.insert(leaked, index);
70            Symbol(index)
71        }
72        }
73    }
74
75    /// Store static `string` (without leaking memory) in this interner if
76    /// not already cached.
77    fn intern_static(&mut self, string: &'static str) -> Symbol {
78        match self.index.get(string) {
79        | Some(&index) => Symbol(index),
80        | None => {
81            let index = self.store.len();
82            self.store.push(string);
83            self.index.insert(string, index);
84            Symbol(index)
85        }
86        }
87    }
88
89    /// Resolve `symbol` in this interner.
90    ///
91    /// Panics if `symbol` was produced by this interner.
92    fn resolve(&self, symbol: Symbol) -> &'static str {
93        self.store[symbol.0]
94    }
95}
96
97/// Look up `string` in the global cache, and insert it if missing.
98pub fn intern<S: AsRef<str>>(string: S) -> Symbol {
99    INTERNER.write()
100        .expect("[INTERNAL ERROR]: poisoned global interner lock")
101        .intern(string)
102}
103
104/// Look up static `string` in the global cache, and insert it without allocating
105/// if it is missing.
106pub fn intern_static(string: &'static str) -> Symbol {
107    INTERNER.write()
108        .expect("[INTERNAL ERROR]: poisoned global interner lock")
109        .intern_static(string)
110}
111
112/// Resolve `symbol` to its string representation.
113pub fn resolve(symbol: Symbol) -> &'static str {
114    INTERNER.read()
115        .expect("[INTERNAL ERROR]: poisoned global interner lock")
116        .resolve(symbol)
117}
118
119impl From<Symbol> for &'static str {
120    fn from(symbol: Symbol) -> Self {
121        resolve(symbol)
122    }
123}
124
125impl str::FromStr for Symbol {
126    type Err = ();
127    fn from_str(s: &str) -> Result<Self, Self::Err> {
128        Ok(intern(s))
129    }
130}
131
132impl cmp::PartialOrd for Symbol {
133    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
134        resolve(*self).partial_cmp(resolve(*other))
135    }
136}
137
138impl fmt::Debug for Symbol {
139    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
140        write!(fmt, "{:?}", resolve(*self))
141    }
142}
143
144impl fmt::Display for Symbol {
145    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
146        write!(fmt, "{}", resolve(*self))
147    }
148}
149
150#[cfg(test)]
151mod tests {
152
153    use super::{intern, resolve};
154
155    #[test]
156    fn test_same() {
157        let symbol_a = intern("String");
158        let symbol_b = intern("String");
159        assert_eq!(symbol_a, symbol_b);
160    }
161
162    #[test]
163    fn test_different() {
164        let symbol_a = intern("StringA");
165        let symbol_b = intern("StringB");
166        assert_ne!(symbol_a, symbol_b);
167    }
168
169    #[test]
170    fn test_case() {
171        let symbol_a = intern("String");
172        let symbol_b = intern("string");
173        assert_ne!(symbol_a, symbol_b);
174    }
175
176    #[test]
177    fn test_resolve() {
178        let symbol = intern("abcd");
179        let string: &'static str = resolve(symbol);
180        assert_eq!("abcd", string);
181    }
182
183    #[test]
184    fn test_debug() {
185        let symbol = intern("Debug");
186        assert_eq!(format!("{:?}", symbol), format!("{:?}", "Debug".to_string()));
187    }
188
189    #[test]
190    fn test_display() {
191        let symbol = intern("Display");
192        assert_eq!(format!("{}", symbol), format!("{}", "Display".to_string()));
193    }
194
195    #[test]
196    fn test_cmp() {
197        let y = intern("y");
198        let z = intern("z");
199        let x = intern("x");
200        assert!(x < y);
201        assert!(y < z);
202        assert!(x < z);
203    }
204}