Skip to main content

oxilean_kernel/string_intern/
pool.rs

1//! `InternPool` — backing storage for the string interning table.
2
3use std::collections::HashMap;
4
5/// Statistics snapshot for an `InternPool`.
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct InternPoolStats {
8    /// Number of distinct strings stored.
9    pub len: usize,
10    /// Sum of byte lengths of all stored strings (not including null terminators).
11    pub total_bytes: usize,
12}
13
14/// The raw storage for interned strings.
15///
16/// - `strings` is append-only; indices are stable for the lifetime of the pool.
17/// - `map` maps each string's content to its index for O(1) deduplication.
18/// - `static_ptrs` caches the `&'static str` obtained by leaking each string
19///   exactly once, so subsequent `resolve` calls are O(1) slice lookups.
20pub struct InternPool {
21    /// All interned strings in insertion order.
22    pub(super) strings: Vec<String>,
23    /// Reverse map: content → index.
24    pub(super) map: HashMap<String, u32>,
25    /// Leaked static pointers, one per interned string (lazy-filled).
26    pub(super) static_ptrs: Vec<Option<&'static str>>,
27}
28
29impl InternPool {
30    /// Creates a new, empty `InternPool`.
31    pub fn new() -> Self {
32        Self {
33            strings: Vec::new(),
34            map: HashMap::new(),
35            static_ptrs: Vec::new(),
36        }
37    }
38
39    /// Interns `s`, returning a stable index.
40    ///
41    /// If `s` is already present the existing index is returned without
42    /// any allocation. Otherwise a new `String` is pushed and indexed.
43    pub fn intern_str(&mut self, s: &str) -> u32 {
44        if let Some(&id) = self.map.get(s) {
45            return id;
46        }
47        let id = self.strings.len() as u32;
48        self.strings.push(s.to_string());
49        self.static_ptrs.push(None);
50        self.map.insert(s.to_string(), id);
51        id
52    }
53
54    /// Resolves index `id` to a `&'static str`.
55    ///
56    /// The string is leaked into a `Box<str>` on the first call for `id` and
57    /// the resulting pointer is cached. Subsequent calls for the same `id`
58    /// return the cached pointer without any allocation.
59    ///
60    /// Returns `None` if `id` is out of range.
61    pub fn resolve_str(&mut self, id: u32) -> Option<&'static str> {
62        let idx = id as usize;
63        if idx >= self.strings.len() {
64            return None;
65        }
66        // Use cached static pointer if available.
67        if let Some(ptr) = self.static_ptrs[idx] {
68            return Some(ptr);
69        }
70        // Leak a copy as a Box<str> → &'static str.
71        let leaked: &'static str = Box::leak(self.strings[idx].clone().into_boxed_str());
72        self.static_ptrs[idx] = Some(leaked);
73        Some(leaked)
74    }
75
76    /// Returns the number of distinct strings stored.
77    pub fn len(&self) -> usize {
78        self.strings.len()
79    }
80
81    /// Returns `true` when no strings have been interned.
82    pub fn is_empty(&self) -> bool {
83        self.strings.is_empty()
84    }
85
86    /// Returns the total byte count of all interned strings.
87    pub fn total_bytes(&self) -> usize {
88        self.strings.iter().map(|s| s.len()).sum()
89    }
90
91    /// Returns a `InternPoolStats` snapshot.
92    pub fn stats(&self) -> InternPoolStats {
93        InternPoolStats {
94            len: self.len(),
95            total_bytes: self.total_bytes(),
96        }
97    }
98}
99
100impl Default for InternPool {
101    fn default() -> Self {
102        Self::new()
103    }
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn test_pool_intern_returns_same_index_for_same_string() {
112        let mut pool = InternPool::new();
113        let id1 = pool.intern_str("hello");
114        let id2 = pool.intern_str("hello");
115        assert_eq!(id1, id2);
116    }
117
118    #[test]
119    fn test_pool_intern_returns_different_indices_for_different_strings() {
120        let mut pool = InternPool::new();
121        let id1 = pool.intern_str("hello");
122        let id2 = pool.intern_str("world");
123        assert_ne!(id1, id2);
124    }
125
126    #[test]
127    fn test_pool_resolve_returns_correct_string() {
128        let mut pool = InternPool::new();
129        let id = pool.intern_str("kernel");
130        let s = pool.resolve_str(id);
131        assert_eq!(s, Some("kernel"));
132    }
133
134    #[test]
135    fn test_pool_resolve_out_of_range_returns_none() {
136        let mut pool = InternPool::new();
137        assert!(pool.resolve_str(99).is_none());
138    }
139
140    #[test]
141    fn test_pool_len_counts_unique_strings() {
142        let mut pool = InternPool::new();
143        pool.intern_str("a");
144        pool.intern_str("b");
145        pool.intern_str("a"); // duplicate
146        assert_eq!(pool.len(), 2);
147    }
148
149    #[test]
150    fn test_pool_total_bytes() {
151        let mut pool = InternPool::new();
152        pool.intern_str("abc"); // 3 bytes
153        pool.intern_str("de"); // 2 bytes
154        assert_eq!(pool.total_bytes(), 5);
155    }
156
157    #[test]
158    fn test_pool_stats_snapshot() {
159        let mut pool = InternPool::new();
160        pool.intern_str("hello");
161        pool.intern_str("world");
162        let stats = pool.stats();
163        assert_eq!(stats.len, 2);
164        assert_eq!(stats.total_bytes, 10);
165    }
166
167    #[test]
168    fn test_pool_is_empty_initially() {
169        let pool = InternPool::new();
170        assert!(pool.is_empty());
171    }
172
173    #[test]
174    fn test_pool_default_is_empty() {
175        let pool = InternPool::default();
176        assert!(pool.is_empty());
177    }
178
179    #[test]
180    fn test_pool_sequential_indices() {
181        let mut pool = InternPool::new();
182        let ids: Vec<u32> = ["x", "y", "z"].iter().map(|s| pool.intern_str(s)).collect();
183        assert_eq!(ids, vec![0, 1, 2]);
184    }
185
186    #[test]
187    fn test_pool_empty_string_internable() {
188        let mut pool = InternPool::new();
189        let id = pool.intern_str("");
190        assert_eq!(pool.resolve_str(id), Some(""));
191    }
192
193    #[test]
194    fn test_pool_unicode_string() {
195        let mut pool = InternPool::new();
196        let id = pool.intern_str("日本語");
197        assert_eq!(pool.resolve_str(id), Some("日本語"));
198    }
199
200    #[test]
201    fn test_pool_resolve_same_pointer_on_repeated_calls() {
202        let mut pool = InternPool::new();
203        let id = pool.intern_str("stable");
204        let ptr1 = pool.resolve_str(id).map(|s| s.as_ptr());
205        let ptr2 = pool.resolve_str(id).map(|s| s.as_ptr());
206        assert_eq!(
207            ptr1, ptr2,
208            "resolve must return the same static pointer each time"
209        );
210    }
211}