plotnik_compiler/compile/
compiler.rs

1//! Core compiler state and entry points.
2
3use std::cell::RefCell;
4
5use indexmap::IndexMap;
6use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol};
7
8use crate::analyze::symbol_table::SymbolTable;
9use crate::analyze::type_check::{DefId, TypeContext};
10use crate::bytecode::{InstructionIR, Label, ReturnIR, TrampolineIR};
11use crate::emit::StringTableBuilder;
12use crate::parser::Expr;
13use plotnik_bytecode::Nav;
14
15use super::capture::CaptureEffects;
16use super::collapse_prefix::collapse_prefix;
17use super::collapse_up::collapse_up;
18use super::dce::remove_unreachable;
19use super::epsilon_elim::eliminate_epsilons;
20use super::lower::lower;
21use super::error::{CompileError, CompileResult};
22use super::scope::StructScope;
23use super::verify::debug_verify_ir_fingerprint;
24
25/// Compilation context bundling all shared compilation state.
26///
27/// Uses `RefCell` for `strings` to allow interior mutability while
28/// sharing the context across compilation phases.
29pub struct CompileCtx<'a> {
30    pub interner: &'a Interner,
31    pub type_ctx: &'a TypeContext,
32    pub symbol_table: &'a SymbolTable,
33    pub strings: &'a RefCell<StringTableBuilder>,
34    pub node_types: Option<&'a IndexMap<Symbol, NodeTypeId>>,
35    pub node_fields: Option<&'a IndexMap<Symbol, NodeFieldId>>,
36}
37
38/// Compiler state for Thompson construction.
39pub struct Compiler<'a> {
40    pub(super) ctx: &'a CompileCtx<'a>,
41    pub(super) instructions: Vec<InstructionIR>,
42    pub(crate) next_label_id: u32,
43    pub(super) def_entries: IndexMap<DefId, Label>,
44    /// Stack of active struct scopes for capture lookup.
45    /// Innermost scope is at the end.
46    pub(super) scope_stack: Vec<StructScope>,
47}
48
49impl<'a> Compiler<'a> {
50    /// Create a new compiler with the given context.
51    pub fn new(ctx: &'a CompileCtx<'a>) -> Self {
52        Self {
53            ctx,
54            instructions: Vec::new(),
55            next_label_id: 0,
56            def_entries: IndexMap::new(),
57            scope_stack: Vec::new(),
58        }
59    }
60
61    /// Compile all definitions in the query.
62    pub fn compile(ctx: &'a CompileCtx<'a>) -> Result<CompileResult, CompileError> {
63        let mut compiler = Compiler::new(ctx);
64
65        // Emit universal preamble first: Obj -> Trampoline -> EndObj -> Return
66        // This wraps any entrypoint to create the top-level scope.
67        let preamble_entry = compiler.emit_preamble();
68
69        // Pre-allocate entry labels for all definitions
70        for (def_id, _) in ctx.type_ctx.iter_def_types() {
71            let label = compiler.fresh_label();
72            compiler.def_entries.insert(def_id, label);
73        }
74
75        // Compile each definition
76        for (def_id, _) in ctx.type_ctx.iter_def_types() {
77            compiler.compile_def(def_id)?;
78        }
79
80        let mut result = CompileResult {
81            instructions: compiler.instructions,
82            def_entries: compiler.def_entries,
83            preamble_entry,
84        };
85
86        // Eliminate epsilon transitions (with semantic verification in debug builds)
87        eliminate_epsilons(&mut result, ctx);
88
89        // Remove unreachable instructions (bypassed epsilons, etc.)
90        remove_unreachable(&mut result);
91
92        // Collapse structurally identical successor instructions
93        collapse_prefix(&mut result);
94
95        // Collapse consecutive Up instructions of the same mode
96        collapse_up(&mut result);
97
98        // Lower to bytecode-compatible form (cascade overflows)
99        lower(&mut result);
100
101        Ok(result)
102    }
103
104    /// Emit the universal preamble: Obj -> Trampoline -> EndObj -> Return
105    ///
106    /// The preamble creates a scope for the entrypoint's captures.
107    /// The Trampoline instruction jumps to the actual entrypoint (set via VM context).
108    fn emit_preamble(&mut self) -> Label {
109        // Return (stack is empty after preamble, so this means Accept)
110        let return_label = self.fresh_label();
111        self.instructions.push(ReturnIR::new(return_label).into());
112
113        // Chain: Obj -> Trampoline -> EndObj -> Return
114        let endobj_label = self.emit_endobj_step(return_label);
115
116        let trampoline_label = self.fresh_label();
117        self.instructions
118            .push(TrampolineIR::new(trampoline_label, endobj_label).into());
119
120        self.emit_obj_step(trampoline_label)
121    }
122
123    /// Generate a fresh label.
124    pub(super) fn fresh_label(&mut self) -> Label {
125        let l = Label(self.next_label_id);
126        self.next_label_id += 1;
127        l
128    }
129
130    /// Compile a single definition.
131    fn compile_def(&mut self, def_id: DefId) -> Result<(), CompileError> {
132        let name_sym = self.ctx.type_ctx.def_name_sym(def_id);
133        let name = self.ctx.interner.resolve(name_sym);
134
135        let Some(body) = self.ctx.symbol_table.get(name) else {
136            return Err(CompileError::DefinitionNotFound(name.to_string()));
137        };
138
139        let entry_label = self.def_entries[&def_id];
140
141        // Create Return instruction at definition exit.
142        // When stack is empty, Return means Accept (top-level match completed).
143        // When stack is non-empty, Return pops frame and jumps to return address.
144        let return_label = self.fresh_label();
145        self.instructions.push(ReturnIR::new(return_label).into());
146
147        // Definition bodies use StayExact navigation: match at current position only.
148        // The caller (alternation, sequence, quantifier, or VM top-level) owns the search.
149        // This ensures named definition calls don't advance past positions that other
150        // alternation branches should try.
151        let body_nav = Some(Nav::StayExact);
152
153        // Definitions are compiled in normalized form: body -> Return
154        // No Obj/EndObj wrapper - that's the caller's responsibility (call-site scoping).
155        // We still use with_scope for member index lookup during compilation.
156        let body_entry = if let Some(type_id) = self.ctx.type_ctx.get_def_type(def_id) {
157            self.with_scope(type_id, |this| {
158                this.compile_expr_with_nav(body, return_label, body_nav)
159            })
160        } else {
161            self.compile_expr_with_nav(body, return_label, body_nav)
162        };
163
164        // If body_entry differs from our pre-allocated entry, emit an epsilon jump
165        if body_entry != entry_label {
166            self.emit_epsilon(entry_label, vec![body_entry]);
167        }
168
169        // Debug-only: verify IR semantic fingerprint
170        debug_verify_ir_fingerprint(
171            &self.instructions,
172            entry_label,
173            &self.def_entries,
174            name,
175            self.ctx,
176        );
177
178        Ok(())
179    }
180
181    /// Compile an expression with an optional navigation override.
182    pub(super) fn compile_expr_with_nav(
183        &mut self,
184        expr: &Expr,
185        exit: Label,
186        nav_override: Option<Nav>,
187    ) -> Label {
188        self.compile_expr_inner(expr, exit, nav_override, CaptureEffects::default())
189    }
190
191    /// Compile an expression with navigation override and capture effects.
192    ///
193    /// Capture effects are propagated to the innermost match instruction:
194    /// - Named/Anonymous nodes: effects go on the match
195    /// - Sequences: effects go on last item
196    /// - Alternations: effects go on each branch
197    /// - Other wrappers: effects propagate through
198    pub(super) fn compile_expr_inner(
199        &mut self,
200        expr: &Expr,
201        exit: Label,
202        nav_override: Option<Nav>,
203        capture: CaptureEffects,
204    ) -> Label {
205        match expr {
206            // Leaf nodes: attach capture effects directly
207            Expr::NamedNode(n) => self.compile_named_node_inner(n, exit, nav_override, capture),
208            Expr::AnonymousNode(n) => {
209                self.compile_anonymous_node_inner(n, exit, nav_override, capture)
210            }
211            // Sequences: pass capture to last item
212            Expr::SeqExpr(s) => self.compile_seq_inner(s, exit, nav_override, capture),
213            // Alternations: pass capture to each branch
214            Expr::AltExpr(a) => self.compile_alt_inner(a, exit, nav_override, capture),
215            // Wrappers: propagate capture through
216            Expr::CapturedExpr(c) => self.compile_captured_inner(c, exit, nav_override, capture),
217            Expr::QuantifiedExpr(q) => {
218                self.compile_quantified_inner(q, exit, nav_override, capture)
219            }
220            Expr::FieldExpr(f) => self.compile_field_inner(f, exit, nav_override, capture),
221            // Refs: special handling for Call
222            Expr::Ref(r) => self.compile_ref_inner(r, exit, nav_override, None, capture),
223        }
224    }
225}