Skip to main content

xsd_schema/namespace/
table.rs

1//! String interning via NameTable
2//!
3//! The NameTable provides O(1) string interning using a chained hash table.
4//! All names and namespace URIs in the schema model go through this table
5//! to ensure deduplication and fast equality checks via NameId.
6//!
7//! Design per XML_NAME_TABLE.md:
8//! - Entry: {hash, next, text: Box<str>}
9//! - NameId(0) reserved for empty string
10//! - Rehashing when entries.len() > buckets.len()
11//! - Pre-seed with standard namespaces
12//!
13//! ## Interior Mutability
14//!
15//! NameTable uses interior mutability (`RefCell`) to allow adding new strings
16//! through a shared reference (`&self`). This enables runtime string interning
17//! in contexts where only immutable access to the table is available (e.g.,
18//! during XPath function evaluation).
19
20use crate::ids::NameId;
21use std::cell::RefCell;
22use std::collections::hash_map::DefaultHasher;
23use std::hash::{Hash, Hasher};
24
25/// Entry in the name table
26///
27/// # Invariants (for unsafe `resolve_ref` / `try_resolve_ref`)
28///
29/// 1. `text` is immutable after insertion — no API replaces or removes it.
30/// 2. No future API should remove, compact, or overwrite entries.
31///
32/// Note: `hash` and `next` *are* mutated during rehashing; only `text` stability matters.
33#[derive(Debug)]
34struct Entry {
35    /// Cached hash value
36    hash: u64,
37    /// Next entry in chain (-1 = none); mutated during rehashing
38    next: i32,
39    /// The interned string (immutable after insertion — never replaced or removed)
40    text: Box<str>,
41}
42
43/// String interning table for names and namespace URIs
44///
45/// Provides O(1) average-case lookup and insertion.
46/// All strings in the schema model should be interned through this table.
47///
48/// Uses interior mutability to allow adding strings through shared references.
49///
50/// # Example
51///
52/// ```
53/// use xsd_schema::namespace::NameTable;
54///
55/// let table = NameTable::new();
56/// let id1 = table.add("hello");
57/// let id2 = table.add("hello");
58/// assert_eq!(id1, id2); // Same string -> same ID
59/// assert_eq!(table.resolve(id1), "hello");
60/// ```
61#[derive(Debug)]
62pub struct NameTable {
63    /// All entries (indexed by NameId) - uses RefCell for interior mutability
64    entries: RefCell<Vec<Entry>>,
65    /// Hash buckets (index into entries via first entry in chain)
66    buckets: RefCell<Vec<i32>>,
67}
68
69impl NameTable {
70    /// Initial bucket count
71    const INITIAL_BUCKETS: usize = 256;
72
73    /// Create a new name table pre-seeded with standard namespaces
74    pub fn new() -> Self {
75        let table = Self {
76            entries: RefCell::new(Vec::with_capacity(Self::INITIAL_BUCKETS)),
77            buckets: RefCell::new(vec![-1; Self::INITIAL_BUCKETS]),
78        };
79
80        // Pre-seed standard values
81        // NameId(0) = empty string (reserved)
82        table.add("");
83
84        // Standard namespace URIs
85        table.add(XS_NAMESPACE);
86        table.add(XSI_NAMESPACE);
87        table.add(XML_NAMESPACE);
88        table.add(XMLNS_NAMESPACE);
89
90        // Standard prefixes
91        table.add("xs");
92        table.add("xsd");
93        table.add("xsi");
94        table.add("xml");
95        table.add("xmlns");
96
97        // XSI attribute local names
98        table.add("type");
99        table.add("nil");
100        table.add("schemaLocation");
101        table.add("noNamespaceSchemaLocation");
102
103        table
104    }
105
106    /// Add a string to the table, returning its NameId
107    ///
108    /// If the string already exists, returns the existing NameId.
109    /// Otherwise, creates a new entry and returns a new NameId.
110    ///
111    /// Uses interior mutability, so this can be called through a shared reference.
112    pub fn add(&self, value: &str) -> NameId {
113        let hash = Self::hash_str(value);
114
115        // Check if already present (read-only borrow)
116        if let Some(id) = self.find(value, hash) {
117            return id;
118        }
119
120        // Need to insert (mutable borrow)
121        self.insert(value, hash)
122    }
123
124    /// Get the NameId for a string if it exists
125    pub fn get(&self, value: &str) -> Option<NameId> {
126        let hash = Self::hash_str(value);
127        self.find(value, hash)
128    }
129
130    /// Resolve a NameId to a string slice (zero-allocation).
131    ///
132    /// Prefer this over `resolve()` in performance-critical paths
133    /// (e.g., DomNavigator implementations).
134    ///
135    /// # Safety (internal)
136    ///
137    /// Uses `RefCell::as_ptr()` to bypass the borrow guard. This is sound because:
138    /// - `Entry.text` is `Box<str>` — heap-stable allocation
139    /// - `Entry.text` is never replaced or removed after insertion
140    ///   (other Entry fields like `next` may be mutated during rehashing,
141    ///   but only `text` stability matters here)
142    /// - The returned `&str` points to the Box's heap data, not the Vec buffer,
143    ///   so it remains valid even if the Vec reallocates during a later `add()`
144    ///
145    /// # Panics
146    ///
147    /// Panics if the NameId is out of bounds.
148    pub fn resolve_ref(&self, id: NameId) -> &str {
149        unsafe { &(&(*self.entries.as_ptr()))[id.0 as usize].text }
150    }
151
152    /// Try to resolve a NameId to a string slice (zero-allocation).
153    ///
154    /// Returns `None` if the NameId is out of bounds.
155    pub fn try_resolve_ref(&self, id: NameId) -> Option<&str> {
156        unsafe {
157            (&(*self.entries.as_ptr()))
158                .get(id.0 as usize)
159                .map(|e| e.text.as_ref())
160        }
161    }
162
163    /// Resolve a NameId to its string value
164    ///
165    /// Returns a copy of the string. For zero-allocation access, use `resolve_ref()`.
166    ///
167    /// # Panics
168    ///
169    /// Panics if the NameId is invalid (out of bounds).
170    pub fn resolve(&self, id: NameId) -> String {
171        self.resolve_ref(id).to_string()
172    }
173
174    /// Try to resolve a NameId to its string value
175    ///
176    /// Returns a copy of the string. For zero-allocation access, use `try_resolve_ref()`.
177    pub fn try_resolve(&self, id: NameId) -> Option<String> {
178        self.try_resolve_ref(id).map(|s| s.to_string())
179    }
180
181    /// Get the number of interned strings
182    pub fn len(&self) -> usize {
183        self.entries.borrow().len()
184    }
185
186    /// Check if the table is empty
187    pub fn is_empty(&self) -> bool {
188        self.entries.borrow().is_empty()
189    }
190
191    /// Hash a string
192    fn hash_str(value: &str) -> u64 {
193        let mut hasher = DefaultHasher::new();
194        value.hash(&mut hasher);
195        hasher.finish()
196    }
197
198    /// Find an existing entry
199    fn find(&self, value: &str, hash: u64) -> Option<NameId> {
200        let entries = self.entries.borrow();
201        let buckets = self.buckets.borrow();
202
203        let bucket_idx = (hash as usize) % buckets.len();
204        let mut entry_idx = buckets[bucket_idx];
205
206        while entry_idx >= 0 {
207            let entry = &entries[entry_idx as usize];
208            if entry.hash == hash && entry.text.as_ref() == value {
209                return Some(NameId(entry_idx as u32));
210            }
211            entry_idx = entry.next;
212        }
213
214        None
215    }
216
217    /// Insert a new entry
218    fn insert(&self, value: &str, hash: u64) -> NameId {
219        let mut entries = self.entries.borrow_mut();
220        let mut buckets = self.buckets.borrow_mut();
221
222        // Check if we need to rehash
223        if entries.len() >= buckets.len() {
224            Self::rehash_internal(&mut entries, &mut buckets);
225        }
226
227        let id = NameId(entries.len() as u32);
228        let bucket_idx = (hash as usize) % buckets.len();
229
230        // Insert at head of chain
231        let next = buckets[bucket_idx];
232        entries.push(Entry {
233            hash,
234            next,
235            text: value.into(),
236        });
237        buckets[bucket_idx] = id.0 as i32;
238
239        id
240    }
241
242    /// Rehash the table (double bucket count) - internal version
243    #[allow(clippy::ptr_arg)]
244    fn rehash_internal(entries: &mut Vec<Entry>, buckets: &mut Vec<i32>) {
245        let new_size = buckets.len() * 2;
246        *buckets = vec![-1; new_size];
247
248        // Re-insert all entries into new buckets
249        for (idx, entry) in entries.iter_mut().enumerate() {
250            let bucket_idx = (entry.hash as usize) % new_size;
251            entry.next = buckets[bucket_idx];
252            buckets[bucket_idx] = idx as i32;
253        }
254    }
255}
256
257impl Default for NameTable {
258    fn default() -> Self {
259        Self::new()
260    }
261}
262
263// Well-known namespace URIs
264pub const XS_NAMESPACE: &str = "http://www.w3.org/2001/XMLSchema";
265pub const XSI_NAMESPACE: &str = "http://www.w3.org/2001/XMLSchema-instance";
266pub const XML_NAMESPACE: &str = "http://www.w3.org/XML/1998/namespace";
267pub const XMLNS_NAMESPACE: &str = "http://www.w3.org/2000/xmlns/";
268
269/// Well-known NameIds (pre-seeded in NameTable::new())
270pub mod well_known {
271    use crate::ids::NameId;
272
273    /// Empty string
274    pub const EMPTY: NameId = NameId(0);
275
276    /// XSD namespace URI
277    pub const XS_NAMESPACE: NameId = NameId(1);
278
279    /// XSD instance namespace URI
280    pub const XSI_NAMESPACE: NameId = NameId(2);
281
282    /// XML namespace URI
283    pub const XML_NAMESPACE: NameId = NameId(3);
284
285    /// XMLNS namespace URI
286    pub const XMLNS_NAMESPACE: NameId = NameId(4);
287
288    /// "xs" prefix
289    pub const XS_PREFIX: NameId = NameId(5);
290
291    /// "xsd" prefix
292    pub const XSD_PREFIX: NameId = NameId(6);
293
294    /// "xsi" prefix
295    pub const XSI_PREFIX: NameId = NameId(7);
296
297    /// "xml" prefix
298    pub const XML_PREFIX: NameId = NameId(8);
299
300    /// "xmlns" prefix
301    pub const XMLNS_PREFIX: NameId = NameId(9);
302
303    /// XSI local name "type"
304    pub const XSI_TYPE: NameId = NameId(10);
305
306    /// XSI local name "nil"
307    pub const XSI_NIL: NameId = NameId(11);
308
309    /// XSI local name "schemaLocation"
310    pub const XSI_SCHEMA_LOCATION: NameId = NameId(12);
311
312    /// XSI local name "noNamespaceSchemaLocation"
313    pub const XSI_NO_NAMESPACE_SCHEMA_LOCATION: NameId = NameId(13);
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    #[test]
321    fn test_empty_string_is_id_zero() {
322        let table = NameTable::new();
323        assert_eq!(table.resolve(NameId(0)), "");
324    }
325
326    #[test]
327    fn test_add_and_resolve() {
328        let table = NameTable::new();
329        let id = table.add("hello");
330        assert_eq!(table.resolve(id), "hello");
331    }
332
333    #[test]
334    fn test_deduplication() {
335        let table = NameTable::new();
336        let id1 = table.add("hello");
337        let id2 = table.add("hello");
338        assert_eq!(id1, id2);
339    }
340
341    #[test]
342    fn test_different_strings_different_ids() {
343        let table = NameTable::new();
344        let id1 = table.add("hello");
345        let id2 = table.add("world");
346        assert_ne!(id1, id2);
347    }
348
349    #[test]
350    fn test_get_existing() {
351        let table = NameTable::new();
352        let id = table.add("test");
353        assert_eq!(table.get("test"), Some(id));
354    }
355
356    #[test]
357    fn test_get_nonexistent() {
358        let table = NameTable::new();
359        assert_eq!(table.get("nonexistent"), None);
360    }
361
362    #[test]
363    fn test_well_known_namespaces() {
364        let table = NameTable::new();
365        assert_eq!(table.resolve(well_known::XS_NAMESPACE), XS_NAMESPACE);
366        assert_eq!(table.resolve(well_known::XSI_NAMESPACE), XSI_NAMESPACE);
367        assert_eq!(table.resolve(well_known::XML_NAMESPACE), XML_NAMESPACE);
368        assert_eq!(table.resolve(well_known::XMLNS_NAMESPACE), XMLNS_NAMESPACE);
369    }
370
371    #[test]
372    fn test_well_known_prefixes() {
373        let table = NameTable::new();
374        assert_eq!(table.resolve(well_known::XS_PREFIX), "xs");
375        assert_eq!(table.resolve(well_known::XSD_PREFIX), "xsd");
376        assert_eq!(table.resolve(well_known::XSI_PREFIX), "xsi");
377        assert_eq!(table.resolve(well_known::XML_PREFIX), "xml");
378    }
379
380    #[test]
381    fn test_well_known_xsi_local_names() {
382        let table = NameTable::new();
383        assert_eq!(table.resolve(well_known::XSI_TYPE), "type");
384        assert_eq!(table.resolve(well_known::XSI_NIL), "nil");
385        assert_eq!(
386            table.resolve(well_known::XSI_SCHEMA_LOCATION),
387            "schemaLocation"
388        );
389        assert_eq!(
390            table.resolve(well_known::XSI_NO_NAMESPACE_SCHEMA_LOCATION),
391            "noNamespaceSchemaLocation"
392        );
393    }
394
395    #[test]
396    fn test_rehashing() {
397        let table = NameTable::new();
398        // Insert enough entries to trigger rehashing
399        for i in 0..1000 {
400            let s = format!("string_{}", i);
401            table.add(&s);
402        }
403        // Verify all strings can still be found
404        for i in 0..1000 {
405            let s = format!("string_{}", i);
406            assert!(table.get(&s).is_some(), "Failed to find: {}", s);
407        }
408    }
409
410    #[test]
411    fn test_unicode_strings() {
412        let table = NameTable::new();
413        let id1 = table.add("日本語");
414        let id2 = table.add("日本語");
415        assert_eq!(id1, id2);
416        assert_eq!(table.resolve(id1), "日本語");
417    }
418
419    #[test]
420    fn test_interior_mutability() {
421        // Test that add works through shared reference
422        let table = NameTable::new();
423        let table_ref = &table;
424        let id1 = table_ref.add("through_ref");
425        let id2 = table_ref.add("through_ref");
426        assert_eq!(id1, id2);
427        assert_eq!(table.resolve(id1), "through_ref");
428    }
429
430    #[test]
431    fn test_resolve_ref_basic() {
432        let table = NameTable::new();
433        let id = table.add("hello");
434        assert_eq!(table.resolve_ref(id), "hello");
435        assert_eq!(table.resolve_ref(NameId(0)), "");
436    }
437
438    #[test]
439    fn test_try_resolve_ref_basic() {
440        let table = NameTable::new();
441        let id = table.add("test");
442        assert_eq!(table.try_resolve_ref(id), Some("test"));
443        assert_eq!(table.try_resolve_ref(NameId(9999)), None);
444    }
445
446    #[test]
447    fn test_resolve_ref_stability_across_add() {
448        let table = NameTable::new();
449        let id = table.add("stable_string");
450        let resolved: &str = table.resolve_ref(id);
451        // Trigger multiple Vec reallocations
452        for i in 0..500 {
453            table.add(&format!("extra_{i}"));
454        }
455        assert_eq!(resolved, "stable_string");
456    }
457
458    #[test]
459    fn test_try_resolve_ref_stability_across_add() {
460        let table = NameTable::new();
461        let id = table.add("test_str");
462        let resolved: Option<&str> = table.try_resolve_ref(id);
463        // Trigger multiple Vec reallocations
464        for i in 0..500 {
465            table.add(&format!("filler_{i}"));
466        }
467        assert_eq!(resolved, Some("test_str"));
468    }
469}