intern_string/lib.rs
1use fxhash::{FxBuildHasher, FxHashMap};
2
3#[derive(Default)]
4pub struct Intern<'a> {
5 data: FxHashMap<&'a str, InternId>,
6 list: Vec<Box<str>>,
7}
8
9pub type InternId = u32;
10
11impl Intern<'_> {
12 /// Create a new intern table.
13 pub fn new() -> Self {
14 Self {
15 data: FxHashMap::default(),
16 list: Vec::new(),
17 }
18 }
19
20 /// Create a new intern table with the given capacity.
21 pub fn with_capacity(capacity: usize) -> Self {
22 Self {
23 data: FxHashMap::with_capacity_and_hasher(capacity, FxBuildHasher::default()),
24 list: Vec::with_capacity(capacity),
25 }
26 }
27
28 /// Intern a string.
29 /// Returns the interned id.
30 /// If the string is already interned, returns the existing id.
31 /// The string is stored in the intern table for the lifetime of the program.
32 /// The id is a 32-bit integer, so there can be at most 2^32 unique strings interned.
33 /// If the limit is reached, this function will panic.
34 /// The id is guaranteed to be unique for the lifetime of the program.
35 ///
36 /// ## Examples
37 ///
38 /// ```
39 /// use intern_string::Intern;
40 ///
41 /// let mut intern = Intern::new();
42 /// let id = intern.intern("hello");
43 /// assert_eq!(intern.lookup(id), "hello");
44 /// ```
45 #[inline]
46 pub fn intern<V: Into<String> + AsRef<str>>(&mut self, input: V) -> InternId {
47 if let Some(&id) = self.data.get(input.as_ref()) {
48 return id;
49 }
50
51 let owned = input.into().into_boxed_str();
52
53 let str_data = owned.as_ptr();
54 let str_len = owned.len();
55
56 let id = self.list.len() as InternId;
57 self.list.push(owned);
58
59 // SAFETY: we can do this because the allocations inside of a Box<str>
60 // are stable, and so passing ownership to push does not change the
61 // address.
62 //
63 // additionally, because we have not touched the string since we created
64 // these raw pointers ourselves, we know that it is valid UTF-8 and so
65 // can skip that check as well.
66 let k =
67 unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(str_data, str_len)) };
68
69 self.data.insert(k, id);
70 id
71 }
72
73 /// Lookup the interned string by id.
74 ///
75 /// # Panics
76 ///
77 /// Panics if the id is not valid.
78 ///
79 /// # Examples
80 ///
81 /// ```
82 /// use intern_string::Intern;
83 ///
84 /// let mut intern = Intern::new();
85 /// let id = intern.intern("hello");
86 /// assert_eq!(intern.lookup(id), "hello");
87 /// ```
88 #[inline]
89 pub fn lookup(&self, id: InternId) -> &str {
90 &self.list[id as usize]
91 }
92
93 /// Lookup the interned string by id.
94 /// Returns `None` if the id is not valid.
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// use intern_string::Intern;
100 ///
101 /// let mut intern = Intern::new();
102 /// let id = intern.intern("hello");
103 /// assert_eq!(intern.try_lookup(id), Some("hello"));
104 /// ```
105 #[inline]
106 pub fn try_lookup(&self, id: InternId) -> Option<&str> {
107 self.list.get(id as usize).map(|s| &**s)
108 }
109}
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114
115 #[test]
116 fn interns_and_handles_lookup() {
117 let mut interner = Intern::new();
118 let id = interner.intern("hello");
119 assert_eq!(interner.lookup(id), "hello");
120 assert_eq!(interner.try_lookup(id), Some("hello"));
121 }
122
123 #[test]
124 fn reallocate() {
125 let mut interner = Intern::with_capacity(1);
126 let id1 = interner.intern("hello");
127
128 // this should reallocate the internal list
129 let id2 = interner.intern("world");
130
131 assert_eq!(interner.lookup(id1), "hello");
132 assert_eq!(interner.try_lookup(id1), Some("hello"));
133 assert_eq!(interner.lookup(id2), "world");
134 assert_eq!(interner.try_lookup(id2), Some("world"));
135 }
136}