1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
use std::cmp;
use std::collections::HashMap;
use std::fmt;
use std::str;
use std::sync;

use once_cell::sync::Lazy;

/// Global cache of interned strings
static INTERNER: Lazy<sync::RwLock<Interner>> = Lazy::new(|| Default::default());

/// Implements naive string internment.
///
/// Requires O(n) heap space to store unique strings, in
/// return for O(1) symbol equality checks and faster symbol hashing.
///
/// Does **NOT** garbage collect interned strings: the memory
/// is intentionally leaked for the duration of the program.
/// This is only suitable for short-lived processes (e.g. compilers).
#[derive(Debug, Default)]
pub struct Interner {
    index: HashMap<&'static str, usize>,
    store: Vec<&'static str>,
}

/// Represents a unique string.
///
/// Only the same `Interner` that produced a `Symbol` can be used
/// to resolve it to a string again.
///
/// # Example
///
/// ```rust
/// use simple_symbol::{intern, resolve};
///
/// pub fn main() {
///     let a = intern("A");
///     let b = intern("A");
///
///     assert_eq!(a, a);
///
///     let c = intern("B");
///
///     assert_ne!(a, c);
///     assert_ne!(b, c);
///
///     // Prints "A"
///     println!("{}", a);
///
///     let str_a = resolve(a);
///
///     assert_eq!(str_a, "A");
/// }
/// ```
#[derive(Copy, Clone, Eq, Hash, Ord, PartialEq)]
pub struct Symbol(usize);

impl Interner {
    /// Store `string` in this interner if not already cached.
    fn intern<S: AsRef<str>>(&mut self, string: S) -> Symbol {
        let string = string.as_ref();
        match self.index.get(string) {
        | Some(&index) => Symbol(index),
        | None => {
            let owned = string.to_owned().into_boxed_str();
            let leaked = Box::leak(owned);
            let index = self.store.len();
            self.store.push(leaked);
            self.index.insert(leaked, index);
            Symbol(index)
        }
        }
    }

    /// Store static `string` (without leaking memory) in this interner if
    /// not already cached.
    fn intern_static(&mut self, string: &'static str) -> Symbol {
        match self.index.get(string) {
        | Some(&index) => Symbol(index),
        | None => {
            let index = self.store.len();
            self.store.push(string);
            self.index.insert(string, index);
            Symbol(index)
        }
        }
    }

    /// Resolve `symbol` in this interner.
    ///
    /// Panics if `symbol` was produced by this interner.
    fn resolve(&self, symbol: Symbol) -> &'static str {
        self.store[symbol.0]
    }
}

/// Look up `string` in the global cache, and insert it if missing.
pub fn intern<S: AsRef<str>>(string: S) -> Symbol {
    INTERNER.write()
        .expect("[INTERNAL ERROR]: poisoned global interner lock")
        .intern(string)
}

/// Look up static `string` in the global cache, and insert it without allocating
/// if it is missing.
pub fn intern_static(string: &'static str) -> Symbol {
    INTERNER.write()
        .expect("[INTERNAL ERROR]: poisoned global interner lock")
        .intern_static(string)
}

/// Resolve `symbol` to its string representation.
pub fn resolve(symbol: Symbol) -> &'static str {
    INTERNER.read()
        .expect("[INTERNAL ERROR]: poisoned global interner lock")
        .resolve(symbol)
}

impl From<Symbol> for &'static str {
    fn from(symbol: Symbol) -> Self {
        resolve(symbol)
    }
}

impl str::FromStr for Symbol {
    type Err = ();
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(intern(s))
    }
}

impl cmp::PartialOrd for Symbol {
    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
        resolve(*self).partial_cmp(resolve(*other))
    }
}

impl fmt::Debug for Symbol {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{:?}", resolve(*self))
    }
}

impl fmt::Display for Symbol {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}", resolve(*self))
    }
}

#[cfg(test)]
mod tests {

    use super::{intern, resolve};

    #[test]
    fn test_same() {
        let symbol_a = intern("String");
        let symbol_b = intern("String");
        assert_eq!(symbol_a, symbol_b);
    }

    #[test]
    fn test_different() {
        let symbol_a = intern("StringA");
        let symbol_b = intern("StringB");
        assert_ne!(symbol_a, symbol_b);
    }

    #[test]
    fn test_case() {
        let symbol_a = intern("String");
        let symbol_b = intern("string");
        assert_ne!(symbol_a, symbol_b);
    }

    #[test]
    fn test_resolve() {
        let symbol = intern("abcd");
        let string: &'static str = resolve(symbol);
        assert_eq!("abcd", string);
    }

    #[test]
    fn test_debug() {
        let symbol = intern("Debug");
        assert_eq!(format!("{:?}", symbol), format!("{:?}", "Debug".to_string()));
    }

    #[test]
    fn test_display() {
        let symbol = intern("Display");
        assert_eq!(format!("{}", symbol), format!("{}", "Display".to_string()));
    }

    #[test]
    fn test_cmp() {
        let y = intern("y");
        let z = intern("z");
        let x = intern("x");
        assert!(x < y);
        assert!(y < z);
        assert!(x < z);
    }
}