plotnik_compiler/compile/
compiler.rs

1//! Core compiler state and entry points.
2
3use std::cell::RefCell;
4
5use indexmap::IndexMap;
6use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol};
7
8use crate::analyze::symbol_table::SymbolTable;
9use crate::analyze::type_check::{DefId, TypeContext};
10use crate::bytecode::{InstructionIR, Label, ReturnIR, TrampolineIR};
11use crate::emit::StringTableBuilder;
12use crate::parser::Expr;
13use plotnik_bytecode::Nav;
14
15use super::capture::CaptureEffects;
16use super::dce::remove_unreachable;
17use super::epsilon_elim::eliminate_epsilons;
18use super::error::{CompileError, CompileResult};
19use super::scope::StructScope;
20use super::verify::debug_verify_ir_fingerprint;
21
22/// Compilation context bundling all shared compilation state.
23///
24/// Uses `RefCell` for `strings` to allow interior mutability while
25/// sharing the context across compilation phases.
26pub struct CompileCtx<'a> {
27    pub interner: &'a Interner,
28    pub type_ctx: &'a TypeContext,
29    pub symbol_table: &'a SymbolTable,
30    pub strings: &'a RefCell<StringTableBuilder>,
31    pub node_types: Option<&'a IndexMap<Symbol, NodeTypeId>>,
32    pub node_fields: Option<&'a IndexMap<Symbol, NodeFieldId>>,
33}
34
35/// Compiler state for Thompson construction.
36pub struct Compiler<'a> {
37    pub(super) ctx: &'a CompileCtx<'a>,
38    pub(super) instructions: Vec<InstructionIR>,
39    pub(crate) next_label_id: u32,
40    pub(super) def_entries: IndexMap<DefId, Label>,
41    /// Stack of active struct scopes for capture lookup.
42    /// Innermost scope is at the end.
43    pub(super) scope_stack: Vec<StructScope>,
44}
45
46impl<'a> Compiler<'a> {
47    /// Create a new compiler with the given context.
48    pub fn new(ctx: &'a CompileCtx<'a>) -> Self {
49        Self {
50            ctx,
51            instructions: Vec::new(),
52            next_label_id: 0,
53            def_entries: IndexMap::new(),
54            scope_stack: Vec::new(),
55        }
56    }
57
58    /// Compile all definitions in the query.
59    pub fn compile(ctx: &'a CompileCtx<'a>) -> Result<CompileResult, CompileError> {
60        let mut compiler = Compiler::new(ctx);
61
62        // Emit universal preamble first: Obj -> Trampoline -> EndObj -> Return
63        // This wraps any entrypoint to create the top-level scope.
64        let preamble_entry = compiler.emit_preamble();
65
66        // Pre-allocate entry labels for all definitions
67        for (def_id, _) in ctx.type_ctx.iter_def_types() {
68            let label = compiler.fresh_label();
69            compiler.def_entries.insert(def_id, label);
70        }
71
72        // Compile each definition
73        for (def_id, _) in ctx.type_ctx.iter_def_types() {
74            compiler.compile_def(def_id)?;
75        }
76
77        let mut result = CompileResult {
78            instructions: compiler.instructions,
79            def_entries: compiler.def_entries,
80            preamble_entry,
81        };
82
83        // Eliminate epsilon transitions (with semantic verification in debug builds)
84        eliminate_epsilons(&mut result, ctx);
85
86        // Remove unreachable instructions (bypassed epsilons, etc.)
87        remove_unreachable(&mut result);
88
89        Ok(result)
90    }
91
92    /// Emit the universal preamble: Obj -> Trampoline -> EndObj -> Return
93    ///
94    /// The preamble creates a scope for the entrypoint's captures.
95    /// The Trampoline instruction jumps to the actual entrypoint (set via VM context).
96    fn emit_preamble(&mut self) -> Label {
97        // Return (stack is empty after preamble, so this means Accept)
98        let return_label = self.fresh_label();
99        self.instructions.push(ReturnIR::new(return_label).into());
100
101        // Chain: Obj -> Trampoline -> EndObj -> Return
102        let endobj_label = self.emit_endobj_step(return_label);
103
104        let trampoline_label = self.fresh_label();
105        self.instructions
106            .push(TrampolineIR::new(trampoline_label, endobj_label).into());
107
108        self.emit_obj_step(trampoline_label)
109    }
110
111    /// Generate a fresh label.
112    pub(super) fn fresh_label(&mut self) -> Label {
113        let l = Label(self.next_label_id);
114        self.next_label_id += 1;
115        l
116    }
117
118    /// Compile a single definition.
119    fn compile_def(&mut self, def_id: DefId) -> Result<(), CompileError> {
120        let name_sym = self.ctx.type_ctx.def_name_sym(def_id);
121        let name = self.ctx.interner.resolve(name_sym);
122
123        let Some(body) = self.ctx.symbol_table.get(name) else {
124            return Err(CompileError::DefinitionNotFound(name.to_string()));
125        };
126
127        let entry_label = self.def_entries[&def_id];
128
129        // Create Return instruction at definition exit.
130        // When stack is empty, Return means Accept (top-level match completed).
131        // When stack is non-empty, Return pops frame and jumps to return address.
132        let return_label = self.fresh_label();
133        self.instructions.push(ReturnIR::new(return_label).into());
134
135        // Definition bodies use StayExact navigation: match at current position only.
136        // The caller (alternation, sequence, quantifier, or VM top-level) owns the search.
137        // This ensures named definition calls don't advance past positions that other
138        // alternation branches should try.
139        let body_nav = Some(Nav::StayExact);
140
141        // Definitions are compiled in normalized form: body -> Return
142        // No Obj/EndObj wrapper - that's the caller's responsibility (call-site scoping).
143        // We still use with_scope for member index lookup during compilation.
144        let body_entry = if let Some(type_id) = self.ctx.type_ctx.get_def_type(def_id) {
145            self.with_scope(type_id, |this| {
146                this.compile_expr_with_nav(body, return_label, body_nav)
147            })
148        } else {
149            self.compile_expr_with_nav(body, return_label, body_nav)
150        };
151
152        // If body_entry differs from our pre-allocated entry, emit an epsilon jump
153        if body_entry != entry_label {
154            self.emit_epsilon(entry_label, vec![body_entry]);
155        }
156
157        // Debug-only: verify IR semantic fingerprint
158        debug_verify_ir_fingerprint(
159            &self.instructions,
160            entry_label,
161            &self.def_entries,
162            name,
163            self.ctx,
164        );
165
166        Ok(())
167    }
168
169    /// Compile an expression with an optional navigation override.
170    pub(super) fn compile_expr_with_nav(
171        &mut self,
172        expr: &Expr,
173        exit: Label,
174        nav_override: Option<Nav>,
175    ) -> Label {
176        self.compile_expr_inner(expr, exit, nav_override, CaptureEffects::default())
177    }
178
179    /// Compile an expression with navigation override and capture effects.
180    ///
181    /// Capture effects are propagated to the innermost match instruction:
182    /// - Named/Anonymous nodes: effects go on the match
183    /// - Sequences: effects go on last item
184    /// - Alternations: effects go on each branch
185    /// - Other wrappers: effects propagate through
186    pub(super) fn compile_expr_inner(
187        &mut self,
188        expr: &Expr,
189        exit: Label,
190        nav_override: Option<Nav>,
191        capture: CaptureEffects,
192    ) -> Label {
193        match expr {
194            // Leaf nodes: attach capture effects directly
195            Expr::NamedNode(n) => self.compile_named_node_inner(n, exit, nav_override, capture),
196            Expr::AnonymousNode(n) => {
197                self.compile_anonymous_node_inner(n, exit, nav_override, capture)
198            }
199            // Sequences: pass capture to last item
200            Expr::SeqExpr(s) => self.compile_seq_inner(s, exit, nav_override, capture),
201            // Alternations: pass capture to each branch
202            Expr::AltExpr(a) => self.compile_alt_inner(a, exit, nav_override, capture),
203            // Wrappers: propagate capture through
204            Expr::CapturedExpr(c) => self.compile_captured_inner(c, exit, nav_override, capture),
205            Expr::QuantifiedExpr(q) => {
206                self.compile_quantified_inner(q, exit, nav_override, capture)
207            }
208            Expr::FieldExpr(f) => self.compile_field_inner(f, exit, nav_override, capture),
209            // Refs: special handling for Call
210            Expr::Ref(r) => self.compile_ref_inner(r, exit, nav_override, None, capture),
211        }
212    }
213}