Skip to main content

harn_vm/compiler/
mod.rs

1use harn_parser::{Node, SNode, TypeExpr};
2
3mod closures;
4mod concurrency;
5mod decls;
6mod error;
7mod error_handling;
8mod expressions;
9mod patterns;
10mod pipe;
11mod state;
12mod statements;
13#[cfg(test)]
14mod tests;
15mod type_facts;
16mod yield_scan;
17
18pub use error::CompileError;
19
20use crate::chunk::{Chunk, Constant, Op};
21
22/// Look through an `AttributedDecl` wrapper to the inner declaration.
23/// `compile_named` / `compile` use this so attributed declarations like
24/// `@test pipeline foo(...)` are still discoverable by name.
25fn peel_node(sn: &SNode) -> &Node {
26    match &sn.node {
27        Node::AttributedDecl { inner, .. } => &inner.node,
28        other => other,
29    }
30}
31
32/// Entry in the compiler's pending-finally stack. See the field-level doc on
33/// `Compiler::finally_bodies` for the unwind semantics each variant encodes.
34#[derive(Clone, Debug)]
35enum FinallyEntry {
36    Finally(Vec<SNode>),
37    CatchBarrier,
38}
39
40/// Tracks loop context for break/continue compilation.
41struct LoopContext {
42    /// Offset of the loop start (for continue).
43    start_offset: usize,
44    /// Positions of break jumps that need patching to the loop end.
45    break_patches: Vec<usize>,
46    /// True if this is a for-in loop (has an iterator to clean up on break).
47    has_iterator: bool,
48    /// Number of exception handlers active at loop entry.
49    handler_depth: usize,
50    /// Number of pending finally bodies at loop entry.
51    finally_depth: usize,
52    /// Lexical scope depth at loop entry.
53    scope_depth: usize,
54}
55
56#[derive(Clone, Copy, Debug)]
57struct LocalBinding {
58    slot: u16,
59    mutable: bool,
60}
61
62/// Compiles an AST into bytecode.
63pub struct Compiler {
64    chunk: Chunk,
65    line: u32,
66    column: u32,
67    /// Track enum type names so PropertyAccess on them can produce EnumVariant.
68    enum_names: std::collections::HashSet<String>,
69    /// Track struct type names to declared field order for indexed instances.
70    struct_layouts: std::collections::HashMap<String, Vec<String>>,
71    /// Track interface names → method names for runtime enforcement.
72    interface_methods: std::collections::HashMap<String, Vec<String>>,
73    /// Stack of active loop contexts for break/continue.
74    loop_stack: Vec<LoopContext>,
75    /// Current depth of exception handlers (for cleanup on break/continue).
76    handler_depth: usize,
77    /// Stack of pending finally bodies plus catch-handler barriers for
78    /// unwind-aware lowering of `throw`, `return`, `break`, and `continue`.
79    ///
80    /// A `Finally` entry is a pending finally body that must execute when
81    /// control exits its enclosing try block. A `CatchBarrier` marks the
82    /// boundary of an active `try/catch` handler: throws emitted inside
83    /// the try body are caught locally, so pre-running finallys *beyond*
84    /// the barrier would wrongly fire side effects for outer blocks the
85    /// throw never actually escapes. Throw lowering stops at the innermost
86    /// barrier; `return`/`break`/`continue`, which do transfer past local
87    /// handlers, still run every pending `Finally` up to their target.
88    finally_bodies: Vec<FinallyEntry>,
89    /// Counter for unique temp variable names.
90    temp_counter: usize,
91    /// Number of lexical block scopes currently active in this compiled frame.
92    scope_depth: usize,
93    /// Top-level `type` aliases, used to lower `schema_of(T)` and
94    /// `output_schema: T` into constant JSON-Schema dicts at compile time.
95    type_aliases: std::collections::HashMap<String, TypeExpr>,
96    /// Lightweight compiler-side type facts used only for conservative
97    /// bytecode specialization. This mirrors lexical scopes and is separate
98    /// from the parser's diagnostic type checker so compile-only callers keep
99    /// working without a required type-check pass.
100    type_scopes: Vec<std::collections::HashMap<String, TypeExpr>>,
101    /// Lexical variable slots for the current compiled frame. The compiler
102    /// only consults this for names declared inside the current function-like
103    /// body; all unresolved names stay on the existing dynamic/name path.
104    local_scopes: Vec<std::collections::HashMap<String, LocalBinding>>,
105    /// True when this compiler is emitting code outside any function-like
106    /// scope (module top-level statements). `try*` is rejected here
107    /// because the rethrow has no enclosing function to live in.
108    /// Pipeline bodies and nested `Compiler::new()` instances (fn,
109    /// closure, tool, etc.) flip this to false before compiling.
110    module_level: bool,
111}
112
113impl Compiler {
114    /// Compile a single AST node. Most arm bodies live in per-category
115    /// submodules (expressions, statements, closures, decls, patterns,
116    /// error_handling, concurrency); this function is a thin dispatcher.
117    fn compile_node(&mut self, snode: &SNode) -> Result<(), CompileError> {
118        self.line = snode.span.line as u32;
119        self.column = snode.span.column as u32;
120        self.chunk.set_column(self.column);
121        match &snode.node {
122            Node::IntLiteral(n) => {
123                let idx = self.chunk.add_constant(Constant::Int(*n));
124                self.chunk.emit_u16(Op::Constant, idx, self.line);
125            }
126            Node::FloatLiteral(n) => {
127                let idx = self.chunk.add_constant(Constant::Float(*n));
128                self.chunk.emit_u16(Op::Constant, idx, self.line);
129            }
130            Node::StringLiteral(s) | Node::RawStringLiteral(s) => {
131                let idx = self.chunk.add_constant(Constant::String(s.clone()));
132                self.chunk.emit_u16(Op::Constant, idx, self.line);
133            }
134            Node::BoolLiteral(true) => self.chunk.emit(Op::True, self.line),
135            Node::BoolLiteral(false) => self.chunk.emit(Op::False, self.line),
136            Node::NilLiteral => self.chunk.emit(Op::Nil, self.line),
137            Node::DurationLiteral(ms) => {
138                let ms = i64::try_from(*ms).map_err(|_| CompileError {
139                    message: "duration literal is too large".to_string(),
140                    line: self.line,
141                })?;
142                let idx = self.chunk.add_constant(Constant::Duration(ms));
143                self.chunk.emit_u16(Op::Constant, idx, self.line);
144            }
145            Node::Identifier(name) => {
146                self.emit_get_binding(name);
147            }
148            Node::LetBinding { pattern, value, .. } => {
149                let binding_type = match &snode.node {
150                    Node::LetBinding {
151                        type_ann: Some(type_ann),
152                        ..
153                    } => Some(type_ann.clone()),
154                    _ => self.infer_expr_type(value),
155                };
156                self.compile_node(value)?;
157                self.compile_destructuring(pattern, false)?;
158                self.record_binding_type(pattern, binding_type);
159            }
160            Node::VarBinding { pattern, value, .. } => {
161                let binding_type = match &snode.node {
162                    Node::VarBinding {
163                        type_ann: Some(type_ann),
164                        ..
165                    } => Some(type_ann.clone()),
166                    _ => self.infer_expr_type(value),
167                };
168                self.compile_node(value)?;
169                self.compile_destructuring(pattern, true)?;
170                self.record_binding_type(pattern, binding_type);
171            }
172            Node::Assignment {
173                target, value, op, ..
174            } => {
175                self.compile_assignment(target, value, op)?;
176            }
177            Node::BinaryOp { op, left, right } => {
178                self.compile_binary_op(op, left, right)?;
179            }
180            Node::UnaryOp { op, operand } => {
181                self.compile_node(operand)?;
182                match op.as_str() {
183                    "-" => self.chunk.emit(Op::Negate, self.line),
184                    "!" => self.chunk.emit(Op::Not, self.line),
185                    _ => {}
186                }
187            }
188            Node::Ternary {
189                condition,
190                true_expr,
191                false_expr,
192            } => {
193                self.compile_node(condition)?;
194                let else_jump = self.chunk.emit_jump(Op::JumpIfFalse, self.line);
195                self.chunk.emit(Op::Pop, self.line);
196                self.compile_node(true_expr)?;
197                let end_jump = self.chunk.emit_jump(Op::Jump, self.line);
198                self.chunk.patch_jump(else_jump);
199                self.chunk.emit(Op::Pop, self.line);
200                self.compile_node(false_expr)?;
201                self.chunk.patch_jump(end_jump);
202            }
203            Node::FunctionCall { name, args } => {
204                self.compile_function_call(name, args)?;
205            }
206            Node::MethodCall {
207                object,
208                method,
209                args,
210            } => {
211                self.compile_method_call(object, method, args)?;
212            }
213            Node::OptionalMethodCall {
214                object,
215                method,
216                args,
217            } => {
218                self.compile_node(object)?;
219                for arg in args {
220                    self.compile_node(arg)?;
221                }
222                let name_idx = self.chunk.add_constant(Constant::String(method.clone()));
223                self.chunk
224                    .emit_method_call_opt(name_idx, args.len() as u8, self.line);
225            }
226            Node::PropertyAccess { object, property } => {
227                self.compile_property_access(object, property)?;
228            }
229            Node::OptionalPropertyAccess { object, property } => {
230                self.compile_node(object)?;
231                let idx = self.chunk.add_constant(Constant::String(property.clone()));
232                self.chunk.emit_u16(Op::GetPropertyOpt, idx, self.line);
233            }
234            Node::SubscriptAccess { object, index } => {
235                self.compile_node(object)?;
236                self.compile_node(index)?;
237                self.chunk.emit(Op::Subscript, self.line);
238            }
239            Node::OptionalSubscriptAccess { object, index } => {
240                self.compile_node(object)?;
241                self.compile_node(index)?;
242                self.chunk.emit(Op::SubscriptOpt, self.line);
243            }
244            Node::SliceAccess { object, start, end } => {
245                self.compile_node(object)?;
246                if let Some(s) = start {
247                    self.compile_node(s)?;
248                } else {
249                    self.chunk.emit(Op::Nil, self.line);
250                }
251                if let Some(e) = end {
252                    self.compile_node(e)?;
253                } else {
254                    self.chunk.emit(Op::Nil, self.line);
255                }
256                self.chunk.emit(Op::Slice, self.line);
257            }
258            Node::IfElse {
259                condition,
260                then_body,
261                else_body,
262            } => {
263                self.compile_if_else(condition, then_body, else_body)?;
264            }
265            Node::WhileLoop { condition, body } => {
266                self.compile_while_loop(condition, body)?;
267            }
268            Node::ForIn {
269                pattern,
270                iterable,
271                body,
272            } => {
273                self.compile_for_in(pattern, iterable, body)?;
274            }
275            Node::ReturnStmt { value } => {
276                self.compile_return_stmt(value)?;
277            }
278            Node::BreakStmt => {
279                self.compile_break_stmt()?;
280            }
281            Node::ContinueStmt => {
282                self.compile_continue_stmt()?;
283            }
284            Node::ListLiteral(elements) => {
285                self.compile_list_literal(elements)?;
286            }
287            Node::DictLiteral(entries) => {
288                self.compile_dict_literal(entries)?;
289            }
290            Node::InterpolatedString(segments) => {
291                self.compile_interpolated_string(segments)?;
292            }
293            Node::FnDecl {
294                name,
295                params,
296                body,
297                is_stream,
298                ..
299            } => {
300                self.compile_fn_decl(name, params, body, *is_stream)?;
301            }
302            Node::ToolDecl {
303                name,
304                description,
305                params,
306                return_type,
307                body,
308                ..
309            } => {
310                self.compile_tool_decl(name, description, params, return_type, body)?;
311            }
312            Node::SkillDecl { name, fields, .. } => {
313                self.compile_skill_decl(name, fields)?;
314            }
315            Node::Closure { params, body, .. } => {
316                self.compile_closure(params, body)?;
317            }
318            Node::ThrowStmt { value } => {
319                self.compile_throw_stmt(value)?;
320            }
321            Node::MatchExpr { value, arms } => {
322                self.compile_match_expr(value, arms)?;
323            }
324            Node::RangeExpr {
325                start,
326                end,
327                inclusive,
328            } => {
329                let name_idx = self
330                    .chunk
331                    .add_constant(Constant::String("__range__".to_string()));
332                self.chunk.emit_u16(Op::Constant, name_idx, self.line);
333                self.compile_node(start)?;
334                self.compile_node(end)?;
335                if *inclusive {
336                    self.chunk.emit(Op::True, self.line);
337                } else {
338                    self.chunk.emit(Op::False, self.line);
339                }
340                self.chunk.emit_u8(Op::Call, 3, self.line);
341            }
342            Node::GuardStmt {
343                condition,
344                else_body,
345            } => {
346                self.compile_guard_stmt(condition, else_body)?;
347            }
348            Node::RequireStmt { condition, message } => {
349                self.compile_node(condition)?;
350                let ok_jump = self.chunk.emit_jump(Op::JumpIfTrue, self.line);
351                self.chunk.emit(Op::Pop, self.line);
352                if let Some(message) = message {
353                    self.compile_node(message)?;
354                } else {
355                    let idx = self
356                        .chunk
357                        .add_constant(Constant::String("require condition failed".to_string()));
358                    self.chunk.emit_u16(Op::Constant, idx, self.line);
359                }
360                self.chunk.emit(Op::Throw, self.line);
361                self.chunk.patch_jump(ok_jump);
362                self.chunk.emit(Op::Pop, self.line);
363            }
364            Node::Block(stmts) => {
365                self.compile_scoped_block(stmts)?;
366            }
367            Node::DeadlineBlock { duration, body } => {
368                self.compile_node(duration)?;
369                self.chunk.emit(Op::DeadlineSetup, self.line);
370                self.compile_scoped_block(body)?;
371                self.chunk.emit(Op::DeadlineEnd, self.line);
372            }
373            Node::MutexBlock { body } => {
374                self.begin_scope();
375                let key_idx = self
376                    .chunk
377                    .add_constant(Constant::String("__default__".to_string()));
378                self.chunk.emit_u16(Op::SyncMutexEnter, key_idx, self.line);
379                for sn in body {
380                    self.compile_node(sn)?;
381                    if Self::produces_value(&sn.node) {
382                        self.chunk.emit(Op::Pop, self.line);
383                    }
384                }
385                self.chunk.emit(Op::Nil, self.line);
386                self.end_scope();
387            }
388            Node::DeferStmt { body } => {
389                // Push onto the finally stack so it runs on return/throw/scope-exit.
390                self.finally_bodies
391                    .push(FinallyEntry::Finally(body.clone()));
392                self.chunk.emit(Op::Nil, self.line);
393            }
394            Node::YieldExpr { value } => {
395                if let Some(val) = value {
396                    self.compile_node(val)?;
397                } else {
398                    self.chunk.emit(Op::Nil, self.line);
399                }
400                self.chunk.emit(Op::Yield, self.line);
401            }
402            Node::EmitExpr { value } => {
403                self.compile_node(value)?;
404                self.chunk.emit(Op::Yield, self.line);
405            }
406            Node::EnumConstruct {
407                enum_name,
408                variant,
409                args,
410            } => {
411                self.compile_enum_construct(enum_name, variant, args)?;
412            }
413            Node::StructConstruct {
414                struct_name,
415                fields,
416            } => {
417                self.compile_struct_construct(struct_name, fields)?;
418            }
419            Node::ImportDecl { path, .. } => {
420                let idx = self.chunk.add_constant(Constant::String(path.clone()));
421                self.chunk.emit_u16(Op::Import, idx, self.line);
422            }
423            Node::SelectiveImport { names, path, .. } => {
424                let path_idx = self.chunk.add_constant(Constant::String(path.clone()));
425                let names_str = names.join(",");
426                let names_idx = self.chunk.add_constant(Constant::String(names_str));
427                self.chunk
428                    .emit_u16(Op::SelectiveImport, path_idx, self.line);
429                let hi = (names_idx >> 8) as u8;
430                let lo = names_idx as u8;
431                self.chunk.code.push(hi);
432                self.chunk.code.push(lo);
433                self.chunk.lines.push(self.line);
434                self.chunk.columns.push(self.column);
435                self.chunk.lines.push(self.line);
436                self.chunk.columns.push(self.column);
437            }
438            Node::TryOperator { operand } => {
439                self.compile_node(operand)?;
440                self.chunk.emit(Op::TryUnwrap, self.line);
441            }
442            // `try* EXPR`: evaluate EXPR; on throw, run pending finally
443            // blocks up to the innermost catch barrier and rethrow the
444            // original value. On success, leave EXPR's value on the stack.
445            //
446            // Per the issue-#26 desugaring:
447            //   { let _r = try { EXPR }
448            //     guard is_ok(_r) else { throw unwrap_err(_r) }
449            //     unwrap(_r) }
450            //
451            // The bytecode realizes this directly: install a try handler
452            // around EXPR so a throw lands in our catch path, where we
453            // pre-run pending finallys and re-emit `Throw`. Skipping the
454            // intermediate Result.Ok/Err wrapping that `TryExpr` does
455            // keeps the success path a no-op (operand value passes through
456            // as-is).
457            Node::TryStar { operand } => {
458                self.compile_try_star(operand)?;
459            }
460            Node::ImplBlock { type_name, methods } => {
461                self.compile_impl_block(type_name, methods)?;
462            }
463            Node::StructDecl { name, fields, .. } => {
464                self.compile_struct_decl(name, fields)?;
465            }
466            // Metadata-only declarations (no runtime effect).
467            Node::Pipeline { .. }
468            | Node::OverrideDecl { .. }
469            | Node::TypeDecl { .. }
470            | Node::EnumDecl { .. }
471            | Node::InterfaceDecl { .. } => {
472                self.chunk.emit(Op::Nil, self.line);
473            }
474            Node::TryCatch {
475                body,
476                error_var,
477                error_type,
478                catch_body,
479                finally_body,
480            } => {
481                self.compile_try_catch(body, error_var, error_type, catch_body, finally_body)?;
482            }
483            Node::TryExpr { body } => {
484                self.compile_try_expr(body)?;
485            }
486            Node::Retry { count, body } => {
487                self.compile_retry(count, body)?;
488            }
489            Node::Parallel {
490                mode,
491                expr,
492                variable,
493                body,
494                options,
495            } => {
496                self.compile_parallel(mode, expr, variable, body, options)?;
497            }
498            Node::SpawnExpr { body } => {
499                self.compile_spawn_expr(body)?;
500            }
501            Node::SelectExpr {
502                cases,
503                timeout,
504                default_body,
505            } => {
506                self.compile_select_expr(cases, timeout, default_body)?;
507            }
508            Node::Spread(_) => {
509                return Err(CompileError {
510                    message: "spread (...) can only be used inside list literals, dict literals, or function call arguments".into(),
511                    line: self.line,
512                });
513            }
514            Node::AttributedDecl { attributes, inner } => {
515                self.compile_attributed_decl(attributes, inner)?;
516            }
517            Node::OrPattern(_) => {
518                return Err(CompileError {
519                    message: "or-pattern (|) can only appear as a match arm pattern".into(),
520                    line: self.line,
521                });
522            }
523        }
524        Ok(())
525    }
526}