plotnik_compiler/emit/
string_table.rs

1//! String table builder for bytecode emission.
2//!
3//! Builds the string table section, remapping query Symbols to bytecode StringIds.
4
5use std::collections::HashMap;
6
7use plotnik_core::{Interner, Symbol};
8
9use plotnik_bytecode::StringId;
10
11use super::EmitError;
12
13/// Easter egg string at index 0 (Dostoevsky, The Idiot).
14/// StringId(0) is reserved and never referenced by instructions.
15pub const EASTER_EGG: &str = "Beauty will save the world";
16
17/// Builds the string table, remapping query Symbols to bytecode StringIds.
18///
19/// The bytecode format requires a subset of the query interner's strings.
20/// This builder collects only the strings that are actually used and assigns
21/// compact StringId indices.
22///
23/// StringId(0) is reserved for an easter egg and is never referenced by
24/// instructions. Actual strings start at index 1.
25#[derive(Debug)]
26pub struct StringTableBuilder {
27    /// Map from query Symbol to bytecode StringId.
28    mapping: HashMap<Symbol, StringId>,
29    /// Reverse lookup from string content to StringId (for intern_str).
30    str_lookup: HashMap<String, StringId>,
31    /// Ordered strings for the binary.
32    strings: Vec<String>,
33}
34
35impl StringTableBuilder {
36    pub fn new() -> Self {
37        let mut builder = Self {
38            mapping: HashMap::new(),
39            str_lookup: HashMap::new(),
40            strings: Vec::new(),
41        };
42        // Reserve index 0 for easter egg (never looked up via str_lookup)
43        builder.strings.push(EASTER_EGG.to_string());
44        builder
45    }
46
47    /// Get or create a StringId for a Symbol.
48    pub fn get_or_intern(
49        &mut self,
50        sym: Symbol,
51        interner: &Interner,
52    ) -> Result<StringId, EmitError> {
53        if let Some(&id) = self.mapping.get(&sym) {
54            return Ok(id);
55        }
56
57        let text = interner
58            .try_resolve(sym)
59            .ok_or(EmitError::StringNotFound(sym))?;
60
61        let id = StringId::new(self.strings.len() as u16);
62        self.strings.push(text.to_string());
63        self.str_lookup.insert(text.to_string(), id);
64        self.mapping.insert(sym, id);
65        Ok(id)
66    }
67
68    /// Intern a string directly (for generated strings not in the query interner).
69    pub fn intern_str(&mut self, s: &str) -> StringId {
70        if let Some(&id) = self.str_lookup.get(s) {
71            return id;
72        }
73
74        let id = StringId::new(self.strings.len() as u16);
75        self.strings.push(s.to_string());
76        self.str_lookup.insert(s.to_string(), id);
77        id
78    }
79
80    /// Number of interned strings.
81    pub fn len(&self) -> usize {
82        self.strings.len()
83    }
84
85    /// Whether the builder is empty.
86    pub fn is_empty(&self) -> bool {
87        self.strings.is_empty()
88    }
89
90    /// Validate that the string count fits in u16.
91    pub fn validate(&self) -> Result<(), EmitError> {
92        // Max count is 65534 because the table needs count+1 entries.
93        // Index 0 is reserved for the easter egg, so we can have 65533 user strings.
94        if self.strings.len() > 65534 {
95            return Err(EmitError::TooManyStrings(self.strings.len()));
96        }
97        Ok(())
98    }
99
100    /// Get the StringId for a Symbol, if it was interned.
101    pub fn get(&self, sym: Symbol) -> Option<StringId> {
102        self.mapping.get(&sym).copied()
103    }
104
105    /// Look up a string by its StringId.
106    pub fn get_str(&self, id: StringId) -> &str {
107        &self.strings[id.get() as usize]
108    }
109
110    /// Emit the string blob and offset table.
111    ///
112    /// Returns (blob_bytes, table_bytes).
113    pub fn emit(&self) -> (Vec<u8>, Vec<u8>) {
114        let mut blob = Vec::new();
115        let mut offsets: Vec<u32> = Vec::with_capacity(self.strings.len() + 1);
116
117        for s in &self.strings {
118            offsets.push(blob.len() as u32);
119            blob.extend_from_slice(s.as_bytes());
120        }
121        offsets.push(blob.len() as u32); // sentinel
122
123        // Convert offsets to bytes
124        let table_bytes: Vec<u8> = offsets.iter().flat_map(|o| o.to_le_bytes()).collect();
125
126        (blob, table_bytes)
127    }
128}
129
130impl Default for StringTableBuilder {
131    fn default() -> Self {
132        Self::new()
133    }
134}