mini_intern/
lib.rs

1use std::collections::HashMap;
2
3/// A struct responsible for efficiently interning strings
4///
5/// # Examples
6/// ```
7/// use mini_intern::Interner;
8/// const TEST_CASE: &str = "test_case";
9/// let mut interner = <Interner<u32>>::with_capacity(64);
10/// _ = interner.intern("something");
11/// _ = interner.intern("something_else");
12/// _ = interner.intern("something_else_again");
13/// _ = interner.intern(TEST_CASE);
14/// assert_eq!(interner.intern(TEST_CASE), 3);
15/// ```
16// pub struct Interner<I, H = DefaultHasher> {
17// TODO: allow plugging in a custom hasher.
18#[derive(Debug)]
19pub struct Interner<I = u32> {
20    map: HashMap<&'static str, I>,
21    vec: Vec<&'static str>,
22    buf: String,
23    full: Vec<String>,
24}
25
26impl<I> Interner<I>
27where
28    I: From<u32> + Copy,
29    u32: From<I>,
30{
31    /// Creates an [`Interner`] with the specified capacity in memory. Useful for
32    /// pre-allocating space if the size of the items to be immediately interned is known.
33    ///
34    /// # Examples
35    /// ```
36    /// use mini_intern::Interner;
37    /// let interner: Interner = Interner::with_capacity(32);
38    /// ```
39    pub fn with_capacity(cap: usize) -> Self {
40        Interner {
41            map: Default::default(),
42            vec: Vec::new(),
43            buf: String::with_capacity(cap.next_power_of_two()),
44            full: Vec::new(),
45        }
46    }
47
48    /// Interns the given value. If the value already exists in the interner,
49    /// its ID is returned and no allocation is performed. If it does not exist,
50    /// the value is interned and a new ID is created for it.
51    ///
52    /// If the intern space is below the maximum capacity, no allocation occurs.
53    ///
54    /// # Examples
55    /// ```
56    /// use mini_intern::Interner;
57    /// let mut interner: Interner<u32> = Interner::with_capacity(32);
58    /// interner.intern("hello");
59    /// interner.intern("and");
60    /// interner.intern("good");
61    /// interner.intern("morning");
62    /// assert_eq!(interner.intern("good"), 2u32);
63    /// ```
64    pub fn intern(&mut self, name: &str) -> I {
65        match self.map.get(name) {
66            Some(id) => (*id).into(),
67            None => {
68                let name = self.alloc(name);
69                let id = self.map.len() as u32;
70                self.map.insert(name, id.into());
71                self.vec.push(name);
72
73                id.into()
74            }
75        }
76    }
77
78    fn alloc(&mut self, name: &str) -> &'static str {
79        let cap = self.buf.capacity();
80        if cap < self.buf.len() + name.len() {
81            let new_cap = (cap.max(name.len()) + 1).next_power_of_two();
82            let new_buf = String::with_capacity(new_cap);
83            let old_buf = std::mem::replace(&mut self.buf, new_buf);
84            self.full.push(old_buf);
85        }
86        let interned = {
87            let start = self.buf.len();
88            self.buf.push_str(name);
89            &self.buf[start..]
90        };
91
92        unsafe { &*(interned as *const str) }
93    }
94
95    pub fn lookup(&self, id: I) -> &'static str {
96        let index: u32 = u32::from(id);
97        self.vec[index as usize]
98    }
99}
100#[cfg(test)]
101mod tests {
102    #[test]
103    fn intern() {
104        use super::Interner;
105        const TEST_CASE: &str = "test_case";
106        const DUMMY_ONE: &str = "dummy_one";
107        const DUMMY_TWO: &str = "dummy_two";
108        const DUMMY_THREE: &str = "dummy_three";
109
110        let mut interner = <Interner<u32>>::with_capacity(4);
111        _ = interner.intern(DUMMY_ONE);
112        _ = interner.intern(DUMMY_TWO);
113        _ = interner.intern(DUMMY_THREE);
114        _ = interner.intern(TEST_CASE);
115        assert_eq!(interner.intern(TEST_CASE), 3)
116    }
117
118    #[test]
119    fn lookup() {
120        use super::Interner;
121        let mut interner: Interner<u32> = Interner::with_capacity(32);
122        interner.intern("hello");
123        interner.intern("and");
124        interner.intern("good");
125        interner.intern("morning");
126        assert_eq!(interner.lookup(2), "good");
127    }
128}