1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
//! Interned symbols

use std::num::NonZeroU32;
use std::sync::atomic::{AtomicU32, Ordering};

use hashbrown::HashMap;

mod pin_buf;

use self::pin_buf::PinBuf;

/// An interned symbol.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct Symbol {
    idx: SymbolIndex,
}

impl Symbol {
    fn new(brand: NonZeroU32, idx: usize) -> Self {
        Symbol {
            idx: SymbolIndex::new(brand, idx),
        }
    }
}

#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
struct SymbolIndex {
    brand: NonZeroU32,
    idx: u32,
}

impl SymbolIndex {
    fn new(brand: NonZeroU32, idx: usize) -> Self {
        debug_assert!(idx < u32::max_value() as usize);
        SymbolIndex {
            brand,
            idx: idx as u32,
        }
    }

    fn index(self) -> usize {
        self.idx as usize
    }
}

/// String interner
///
/// The interner places strings into non-moving buffers to reduce allocations
/// for small input.
///
/// The `&'static str`s in this structure actually points into `buf`.
#[derive(Debug)]
pub struct Interner {
    buf: PinBuf,

    /// "Branding" constant to avoid mixing of symbols from other instances.
    brand: NonZeroU32,

    lookup: HashMap<&'static str, Symbol>,
    strings: Vec<&'static str>,
}

static BRAND_COUNTER: AtomicU32 = AtomicU32::new(1);

impl Interner {
    pub fn new() -> Self {
        let brand = BRAND_COUNTER.fetch_add(1, Ordering::AcqRel);
        let brand =
            NonZeroU32::new(brand).expect("amount of interners created should be reasonable");

        Interner {
            buf: PinBuf::new(),
            brand,
            lookup: HashMap::new(),
            strings: Vec::new(),
        }
    }

    /// Get or intern a symbol.
    pub fn intern(&mut self, string: &str) -> Symbol {
        // SAFETY: The str returned from PinBuf is equal to input. Static
        // lifetime restricted to this type.
        unsafe {
            let Interner {
                buf,
                brand,
                lookup,
                strings,
            } = self;

            let (_, sym) = lookup.raw_entry_mut().from_key(string).or_insert_with(|| {
                let ptr = buf.push_str(string);
                let key = &*ptr;
                let idx = strings.len();
                strings.push(key);
                (key, Symbol::new(*brand, idx))
            });

            *sym
        }
    }

    /// Get the string value of an interned symbol.
    ///
    /// # Panics
    ///
    /// If `symbol` was not interned in this specific instance of `Interner`.
    pub fn read(&self, symbol: Symbol) -> &str {
        assert_eq!(self.brand, symbol.idx.brand, "should be the same brand");
        self.strings[symbol.idx.index()]
    }
}

impl Default for Interner {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn can_intern_symbols() {
        let mut interner = Interner::new();

        let sym_1_a = interner.intern("foo");
        let sym_2_a = interner.intern("bar");
        let sym_3_a = interner.intern("baz");
        let sym_1_b = interner.intern("foo");
        let sym_2_b = interner.intern("bar");
        let sym_3_b = interner.intern("baz");

        assert_eq!(sym_1_a, sym_1_b);
        assert_eq!(sym_2_a, sym_2_b);
        assert_eq!(sym_3_a, sym_3_b);

        assert_ne!(sym_1_a, sym_2_a);
        assert_ne!(sym_2_a, sym_3_a);
        assert_ne!(sym_1_a, sym_3_a);

        assert_eq!("foo", interner.read(sym_1_a));
        assert_eq!("bar", interner.read(sym_2_a));
        assert_eq!("baz", interner.read(sym_3_a));
    }
}