plotnik_lib/ir/
strings.rs

1//! String interning for compiled queries.
2//!
3//! Identical strings share storage and ID. Used for field names, variant tags,
4//! entrypoint names, and type names.
5
6use std::collections::HashMap;
7
8use super::ids::StringId;
9
10/// String interner for query compilation.
11///
12/// Interns strings during the analysis phase, then emits them as a contiguous
13/// byte pool with `StringRef` entries pointing into it.
14#[derive(Debug, Default)]
15pub struct StringInterner<'src> {
16    /// Map from string content to assigned ID.
17    map: HashMap<&'src str, StringId>,
18    /// Strings in ID order for emission.
19    strings: Vec<&'src str>,
20}
21
22impl<'src> StringInterner<'src> {
23    /// Creates a new empty interner.
24    pub fn new() -> Self {
25        Self::default()
26    }
27
28    /// Interns a string, returning its ID.
29    ///
30    /// If the string was previously interned, returns the existing ID.
31    pub fn intern(&mut self, s: &'src str) -> StringId {
32        if let Some(&id) = self.map.get(s) {
33            return id;
34        }
35
36        let id = self.strings.len() as StringId;
37        assert!(id < 0xFFFF, "string pool overflow (>65534 strings)");
38
39        self.map.insert(s, id);
40        self.strings.push(s);
41        id
42    }
43
44    /// Returns the ID of a previously interned string, or `None`.
45    pub fn get(&self, s: &str) -> Option<StringId> {
46        self.map.get(s).copied()
47    }
48
49    /// Returns the string for a given ID.
50    ///
51    /// # Panics
52    /// Panics if the ID is out of range.
53    pub fn resolve(&self, id: StringId) -> &'src str {
54        self.strings[id as usize]
55    }
56
57    /// Returns the number of interned strings.
58    pub fn len(&self) -> usize {
59        self.strings.len()
60    }
61
62    /// Returns true if no strings have been interned.
63    pub fn is_empty(&self) -> bool {
64        self.strings.is_empty()
65    }
66
67    /// Returns an iterator over (id, string) pairs in ID order.
68    pub fn iter(&self) -> impl Iterator<Item = (StringId, &'src str)> + '_ {
69        self.strings
70            .iter()
71            .enumerate()
72            .map(|(i, s)| (i as StringId, *s))
73    }
74
75    /// Returns the total byte size needed for all strings.
76    pub fn total_bytes(&self) -> usize {
77        self.strings.iter().map(|s| s.len()).sum()
78    }
79
80    /// Consumes the interner and returns strings in ID order.
81    pub fn into_strings(self) -> Vec<&'src str> {
82        self.strings
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn intern_deduplicates() {
92        let mut interner = StringInterner::new();
93
94        let id1 = interner.intern("foo");
95        let id2 = interner.intern("bar");
96        let id3 = interner.intern("foo");
97
98        assert_eq!(id1, 0);
99        assert_eq!(id2, 1);
100        assert_eq!(id3, 0); // same as id1
101        assert_eq!(interner.len(), 2);
102    }
103
104    #[test]
105    fn resolve_works() {
106        let mut interner = StringInterner::new();
107        interner.intern("hello");
108        interner.intern("world");
109
110        assert_eq!(interner.resolve(0), "hello");
111        assert_eq!(interner.resolve(1), "world");
112    }
113
114    #[test]
115    fn get_returns_none_for_unknown() {
116        let interner = StringInterner::new();
117        assert_eq!(interner.get("unknown"), None);
118    }
119
120    #[test]
121    fn total_bytes() {
122        let mut interner = StringInterner::new();
123        interner.intern("foo"); // 3 bytes
124        interner.intern("hello"); // 5 bytes
125        interner.intern("foo"); // deduplicated
126
127        assert_eq!(interner.total_bytes(), 8);
128    }
129
130    #[test]
131    fn iter_order() {
132        let mut interner = StringInterner::new();
133        interner.intern("a");
134        interner.intern("b");
135        interner.intern("c");
136
137        let pairs: Vec<_> = interner.iter().collect();
138        assert_eq!(pairs, vec![(0, "a"), (1, "b"), (2, "c")]);
139    }
140}