plotnik_compiler/compile/
compiler.rs

1//! Core compiler state and entry points.
2
3use indexmap::IndexMap;
4use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol};
5
6use crate::analyze::symbol_table::SymbolTable;
7use crate::analyze::type_check::{DefId, TypeContext};
8use plotnik_bytecode::Nav;
9use crate::bytecode::{InstructionIR, Label, ReturnIR, TrampolineIR};
10use crate::emit::StringTableBuilder;
11use crate::parser::Expr;
12
13use super::capture::CaptureEffects;
14use super::error::{CompileError, CompileResult};
15use super::scope::StructScope;
16
17/// Compiler state for Thompson construction.
18pub struct Compiler<'a> {
19    pub(super) interner: &'a Interner,
20    pub(super) type_ctx: &'a TypeContext,
21    pub(crate) symbol_table: &'a SymbolTable,
22    pub(super) strings: &'a mut StringTableBuilder,
23    pub(super) node_type_ids: Option<&'a IndexMap<Symbol, NodeTypeId>>,
24    pub(super) node_field_ids: Option<&'a IndexMap<Symbol, NodeFieldId>>,
25    pub(super) instructions: Vec<InstructionIR>,
26    pub(crate) next_label_id: u32,
27    pub(super) def_entries: IndexMap<DefId, Label>,
28    /// Stack of active struct scopes for capture lookup.
29    /// Innermost scope is at the end.
30    pub(super) scope_stack: Vec<StructScope>,
31}
32
33/// Builder for `Compiler`.
34pub struct CompilerBuilder<'a> {
35    interner: &'a Interner,
36    type_ctx: &'a TypeContext,
37    symbol_table: &'a SymbolTable,
38    strings: &'a mut StringTableBuilder,
39    node_type_ids: Option<&'a IndexMap<Symbol, NodeTypeId>>,
40    node_field_ids: Option<&'a IndexMap<Symbol, NodeFieldId>>,
41}
42
43impl<'a> CompilerBuilder<'a> {
44    /// Create a new builder with required parameters.
45    pub fn new(
46        interner: &'a Interner,
47        type_ctx: &'a TypeContext,
48        symbol_table: &'a SymbolTable,
49        strings: &'a mut StringTableBuilder,
50    ) -> Self {
51        Self {
52            interner,
53            type_ctx,
54            symbol_table,
55            strings,
56            node_type_ids: None,
57            node_field_ids: None,
58        }
59    }
60
61    /// Set node type and field IDs for linked compilation.
62    pub fn linked(
63        mut self,
64        node_type_ids: &'a IndexMap<Symbol, NodeTypeId>,
65        node_field_ids: &'a IndexMap<Symbol, NodeFieldId>,
66    ) -> Self {
67        self.node_type_ids = Some(node_type_ids);
68        self.node_field_ids = Some(node_field_ids);
69        self
70    }
71
72    /// Build the Compiler.
73    pub fn build(self) -> Compiler<'a> {
74        Compiler {
75            interner: self.interner,
76            type_ctx: self.type_ctx,
77            symbol_table: self.symbol_table,
78            strings: self.strings,
79            node_type_ids: self.node_type_ids,
80            node_field_ids: self.node_field_ids,
81            instructions: Vec::new(),
82            next_label_id: 0,
83            def_entries: IndexMap::new(),
84            scope_stack: Vec::new(),
85        }
86    }
87}
88
89impl<'a> Compiler<'a> {
90    /// Create a builder for Compiler.
91    pub fn builder(
92        interner: &'a Interner,
93        type_ctx: &'a TypeContext,
94        symbol_table: &'a SymbolTable,
95        strings: &'a mut StringTableBuilder,
96    ) -> CompilerBuilder<'a> {
97        CompilerBuilder::new(interner, type_ctx, symbol_table, strings)
98    }
99
100    /// Compile all definitions in the query.
101    pub fn compile(
102        interner: &'a Interner,
103        type_ctx: &'a TypeContext,
104        symbol_table: &'a SymbolTable,
105        strings: &'a mut StringTableBuilder,
106        node_type_ids: Option<&'a IndexMap<Symbol, NodeTypeId>>,
107        node_field_ids: Option<&'a IndexMap<Symbol, NodeFieldId>>,
108    ) -> Result<CompileResult, CompileError> {
109        let mut compiler =
110            if let (Some(type_ids), Some(field_ids)) = (node_type_ids, node_field_ids) {
111                Compiler::builder(interner, type_ctx, symbol_table, strings)
112                    .linked(type_ids, field_ids)
113                    .build()
114            } else {
115                Compiler::builder(interner, type_ctx, symbol_table, strings).build()
116            };
117
118        // Emit universal preamble first: Obj -> Trampoline -> EndObj -> Return
119        // This wraps any entrypoint to create the top-level scope.
120        let preamble_entry = compiler.emit_preamble();
121
122        // Pre-allocate entry labels for all definitions
123        for (def_id, _) in type_ctx.iter_def_types() {
124            let label = compiler.fresh_label();
125            compiler.def_entries.insert(def_id, label);
126        }
127
128        // Compile each definition
129        for (def_id, _) in type_ctx.iter_def_types() {
130            compiler.compile_def(def_id)?;
131        }
132
133        Ok(CompileResult {
134            instructions: compiler.instructions,
135            def_entries: compiler.def_entries,
136            preamble_entry,
137        })
138    }
139
140    /// Emit the universal preamble: Obj -> Trampoline -> EndObj -> Return
141    ///
142    /// The preamble creates a scope for the entrypoint's captures.
143    /// The Trampoline instruction jumps to the actual entrypoint (set via VM context).
144    fn emit_preamble(&mut self) -> Label {
145        // Return (stack is empty after preamble, so this means Accept)
146        let return_label = self.fresh_label();
147        self.instructions.push(ReturnIR::new(return_label).into());
148
149        // Chain: Obj -> Trampoline -> EndObj -> Return
150        let endobj_label = self.emit_endobj_step(return_label);
151
152        let trampoline_label = self.fresh_label();
153        self.instructions
154            .push(TrampolineIR::new(trampoline_label, endobj_label).into());
155
156        self.emit_obj_step(trampoline_label)
157    }
158
159    /// Generate a fresh label.
160    pub(super) fn fresh_label(&mut self) -> Label {
161        let l = Label(self.next_label_id);
162        self.next_label_id += 1;
163        l
164    }
165
166    /// Compile a single definition.
167    fn compile_def(&mut self, def_id: DefId) -> Result<(), CompileError> {
168        let name_sym = self.type_ctx.def_name_sym(def_id);
169        let name = self.interner.resolve(name_sym);
170
171        let Some(body) = self.symbol_table.get(name) else {
172            return Err(CompileError::DefinitionNotFound(name.to_string()));
173        };
174
175        let entry_label = self.def_entries[&def_id];
176
177        // Create Return instruction at definition exit.
178        // When stack is empty, Return means Accept (top-level match completed).
179        // When stack is non-empty, Return pops frame and jumps to return address.
180        let return_label = self.fresh_label();
181        self.instructions.push(ReturnIR::new(return_label).into());
182
183        // Definition bodies use StayExact navigation: match at current position only.
184        // The caller (alternation, sequence, quantifier, or VM top-level) owns the search.
185        // This ensures named definition calls don't advance past positions that other
186        // alternation branches should try.
187        let body_nav = Some(Nav::StayExact);
188
189        // Definitions are compiled in normalized form: body -> Return
190        // No Obj/EndObj wrapper - that's the caller's responsibility (call-site scoping).
191        // We still use with_scope for member index lookup during compilation.
192        let body_entry = if let Some(type_id) = self.type_ctx.get_def_type(def_id) {
193            self.with_scope(type_id, |this| {
194                this.compile_expr_with_nav(body, return_label, body_nav)
195            })
196        } else {
197            self.compile_expr_with_nav(body, return_label, body_nav)
198        };
199
200        // If body_entry differs from our pre-allocated entry, emit an epsilon jump
201        if body_entry != entry_label {
202            self.emit_epsilon(entry_label, vec![body_entry]);
203        }
204
205        Ok(())
206    }
207
208    /// Compile an expression with an optional navigation override.
209    pub(super) fn compile_expr_with_nav(
210        &mut self,
211        expr: &Expr,
212        exit: Label,
213        nav_override: Option<Nav>,
214    ) -> Label {
215        self.compile_expr_inner(expr, exit, nav_override, CaptureEffects::default())
216    }
217
218    /// Compile an expression with navigation override and capture effects.
219    ///
220    /// Capture effects are propagated to the innermost match instruction:
221    /// - Named/Anonymous nodes: effects go on the match
222    /// - Sequences: effects go on last item
223    /// - Alternations: effects go on each branch
224    /// - Other wrappers: effects propagate through
225    pub(super) fn compile_expr_inner(
226        &mut self,
227        expr: &Expr,
228        exit: Label,
229        nav_override: Option<Nav>,
230        capture: CaptureEffects,
231    ) -> Label {
232        match expr {
233            // Leaf nodes: attach capture effects directly
234            Expr::NamedNode(n) => self.compile_named_node_inner(n, exit, nav_override, capture),
235            Expr::AnonymousNode(n) => {
236                self.compile_anonymous_node_inner(n, exit, nav_override, capture)
237            }
238            // Sequences: pass capture to last item
239            Expr::SeqExpr(s) => self.compile_seq_inner(s, exit, nav_override, capture),
240            // Alternations: pass capture to each branch
241            Expr::AltExpr(a) => self.compile_alt_inner(a, exit, nav_override, capture),
242            // Wrappers: propagate capture through
243            Expr::CapturedExpr(c) => self.compile_captured_inner(c, exit, nav_override, capture),
244            Expr::QuantifiedExpr(q) => {
245                self.compile_quantified_inner(q, exit, nav_override, capture)
246            }
247            Expr::FieldExpr(f) => self.compile_field_inner(f, exit, nav_override, capture),
248            // Refs: special handling for Call
249            Expr::Ref(r) => self.compile_ref_inner(r, exit, nav_override, None, capture),
250        }
251    }
252}