Skip to main content

velesdb_core/column_store/
string_table.rs

1//! String interning table for fast string comparisons.
2//!
3//! # Safety (EPIC-032/US-010)
4//!
5//! StringId uses u32 internally, limiting the table to ~4 billion strings.
6//! The `intern()` method panics if this limit is exceeded.
7
8use rustc_hash::FxHashMap;
9
10use super::types::StringId;
11
12/// String interning table for fast string comparisons.
13#[derive(Debug, Default)]
14pub struct StringTable {
15    /// String to ID mapping
16    string_to_id: FxHashMap<String, StringId>,
17    /// ID to string mapping (for retrieval)
18    id_to_string: Vec<String>,
19}
20
21impl StringTable {
22    /// Creates a new empty string table.
23    #[must_use]
24    pub fn new() -> Self {
25        Self::default()
26    }
27
28    /// Interns a string, returning its ID.
29    ///
30    /// If the string already exists, returns the existing ID.
31    ///
32    /// # Panics
33    ///
34    /// Panics if the table contains more than `u32::MAX` strings (EPIC-032/US-010).
35    pub fn intern(&mut self, s: &str) -> StringId {
36        if let Some(&id) = self.string_to_id.get(s) {
37            return id;
38        }
39
40        // EPIC-032/US-010: Safe bounds check before truncating cast
41        let len = self.id_to_string.len();
42        assert!(
43            len < u32::MAX as usize,
44            "StringTable overflow: cannot intern more than {} strings",
45            u32::MAX
46        );
47        #[allow(clippy::cast_possible_truncation)] // Bounds checked above
48        let id = StringId(len as u32);
49        self.id_to_string.push(s.to_string());
50        self.string_to_id.insert(s.to_string(), id);
51        id
52    }
53
54    /// Gets the string for an ID.
55    #[must_use]
56    pub fn get(&self, id: StringId) -> Option<&str> {
57        self.id_to_string.get(id.0 as usize).map(String::as_str)
58    }
59
60    /// Gets the ID for a string without interning.
61    #[must_use]
62    pub fn get_id(&self, s: &str) -> Option<StringId> {
63        self.string_to_id.get(s).copied()
64    }
65
66    /// Returns the number of interned strings.
67    #[must_use]
68    pub fn len(&self) -> usize {
69        self.id_to_string.len()
70    }
71
72    /// Returns true if the table is empty.
73    #[must_use]
74    pub fn is_empty(&self) -> bool {
75        self.id_to_string.is_empty()
76    }
77}