Skip to main content

harn_vm/compiler/
mod.rs

1use harn_parser::{Node, SNode, TypeExpr};
2
3mod closures;
4mod concurrency;
5mod decls;
6mod error;
7mod error_handling;
8mod expressions;
9mod hitl;
10mod optimizer;
11mod patterns;
12mod pipe;
13mod state;
14mod statements;
15#[cfg(test)]
16mod tests;
17mod type_facts;
18mod yield_scan;
19
20pub use error::CompileError;
21
22use crate::chunk::{Chunk, Constant, Op};
23
24/// Environment variable that disables optional compiler optimizations.
25///
26/// The VM still emits structurally required bytecode, such as parameter
27/// slots, but skips semantic-preserving optimizer passes. This gives tests
28/// and benchmarks a stable optimized-vs-unoptimized comparison switch.
29pub const HARN_DISABLE_OPTIMIZATIONS_ENV: &str = "HARN_DISABLE_OPTIMIZATIONS";
30
31/// Controls semantic-preserving compiler optimizations.
32#[derive(Clone, Copy, Debug, PartialEq, Eq)]
33pub struct CompilerOptions {
34    optimize: bool,
35}
36
37impl CompilerOptions {
38    pub fn optimized() -> Self {
39        Self { optimize: true }
40    }
41
42    pub fn without_optimizations() -> Self {
43        Self { optimize: false }
44    }
45
46    pub fn from_env() -> Self {
47        if std::env::var_os(HARN_DISABLE_OPTIMIZATIONS_ENV).is_some() {
48            Self::without_optimizations()
49        } else {
50            Self::optimized()
51        }
52    }
53
54    pub fn optimizations_enabled(self) -> bool {
55        self.optimize
56    }
57}
58
59impl Default for CompilerOptions {
60    fn default() -> Self {
61        Self::optimized()
62    }
63}
64
65/// Look through an `AttributedDecl` wrapper to the inner declaration.
66/// `compile_named` / `compile` use this so attributed declarations like
67/// `@test pipeline foo(...)` are still discoverable by name.
68fn peel_node(sn: &SNode) -> &Node {
69    match &sn.node {
70        Node::AttributedDecl { inner, .. } => &inner.node,
71        other => other,
72    }
73}
74
75/// Entry in the compiler's pending-finally stack. See the field-level doc on
76/// `Compiler::finally_bodies` for the unwind semantics each variant encodes.
77#[derive(Clone, Debug)]
78enum FinallyEntry {
79    Finally(Vec<SNode>),
80    CatchBarrier,
81}
82
83/// Tracks loop context for break/continue compilation.
84struct LoopContext {
85    /// Offset of the loop start (for continue).
86    start_offset: usize,
87    /// Positions of break jumps that need patching to the loop end.
88    break_patches: Vec<usize>,
89    /// True if this is a for-in loop (has an iterator to clean up on break).
90    has_iterator: bool,
91    /// Number of exception handlers active at loop entry.
92    handler_depth: usize,
93    /// Number of pending finally bodies at loop entry.
94    finally_depth: usize,
95    /// Lexical scope depth at loop entry.
96    scope_depth: usize,
97}
98
99#[derive(Clone, Copy, Debug)]
100struct LocalBinding {
101    slot: u16,
102    mutable: bool,
103}
104
105/// Compiles an AST into bytecode.
106pub struct Compiler {
107    options: CompilerOptions,
108    chunk: Chunk,
109    line: u32,
110    column: u32,
111    /// Track enum type names so PropertyAccess on them can produce EnumVariant.
112    enum_names: std::collections::HashSet<String>,
113    /// Track struct type names to declared field order for indexed instances.
114    struct_layouts: std::collections::HashMap<String, Vec<String>>,
115    /// Track interface names → method names for runtime enforcement.
116    interface_methods: std::collections::HashMap<String, Vec<String>>,
117    /// Stack of active loop contexts for break/continue.
118    loop_stack: Vec<LoopContext>,
119    /// Current depth of exception handlers (for cleanup on break/continue).
120    handler_depth: usize,
121    /// Stack of pending finally bodies plus catch-handler barriers for
122    /// unwind-aware lowering of `throw`, `return`, `break`, and `continue`.
123    ///
124    /// A `Finally` entry is a pending finally body that must execute when
125    /// control exits its enclosing try block. A `CatchBarrier` marks the
126    /// boundary of an active `try/catch` handler: throws emitted inside
127    /// the try body are caught locally, so pre-running finallys *beyond*
128    /// the barrier would wrongly fire side effects for outer blocks the
129    /// throw never actually escapes. Throw lowering stops at the innermost
130    /// barrier; `return`/`break`/`continue`, which do transfer past local
131    /// handlers, still run every pending `Finally` up to their target.
132    finally_bodies: Vec<FinallyEntry>,
133    /// Counter for unique temp variable names.
134    temp_counter: usize,
135    /// Number of lexical block scopes currently active in this compiled frame.
136    scope_depth: usize,
137    /// Top-level `type` aliases, used to lower `schema_of(T)` and
138    /// `output_schema: T` into constant JSON-Schema dicts at compile time.
139    type_aliases: std::collections::HashMap<String, TypeExpr>,
140    /// Lightweight compiler-side type facts used only for conservative
141    /// bytecode specialization. This mirrors lexical scopes and is separate
142    /// from the parser's diagnostic type checker so compile-only callers keep
143    /// working without a required type-check pass.
144    type_scopes: Vec<std::collections::HashMap<String, TypeExpr>>,
145    /// Current-chunk string constant index. This avoids repeatedly scanning the
146    /// constant pool while compiling name-heavy scripts.
147    string_constants: std::collections::HashMap<String, u16>,
148    /// Lexical variable slots for the current compiled frame. The compiler
149    /// only consults this for names declared inside the current function-like
150    /// body; all unresolved names stay on the existing dynamic/name path.
151    local_scopes: Vec<std::collections::HashMap<String, LocalBinding>>,
152    /// True when this compiler is emitting code outside any function-like
153    /// scope (module top-level statements). `try*` is rejected here
154    /// because the rethrow has no enclosing function to live in.
155    /// Pipeline bodies and nested `Compiler::new()` instances (fn,
156    /// closure, tool, etc.) flip this to false before compiling.
157    module_level: bool,
158}
159
160impl Compiler {
161    /// Compile a single AST node. Most arm bodies live in per-category
162    /// submodules (expressions, statements, closures, decls, patterns,
163    /// error_handling, concurrency); this function is a thin dispatcher.
164    fn compile_node(&mut self, snode: &SNode) -> Result<(), CompileError> {
165        self.line = snode.span.line as u32;
166        self.column = snode.span.column as u32;
167        self.chunk.set_column(self.column);
168        if self.options.optimizations_enabled() {
169            if let Some(folded) = optimizer::fold_constant_expr(snode) {
170                if folded.node != snode.node {
171                    return self.compile_node(&folded);
172                }
173            }
174        }
175        match &snode.node {
176            Node::IntLiteral(n) => {
177                let idx = self.chunk.add_constant(Constant::Int(*n));
178                self.chunk.emit_u16(Op::Constant, idx, self.line);
179            }
180            Node::FloatLiteral(n) => {
181                let idx = self.chunk.add_constant(Constant::Float(*n));
182                self.chunk.emit_u16(Op::Constant, idx, self.line);
183            }
184            Node::StringLiteral(s) | Node::RawStringLiteral(s) => {
185                let idx = self.string_constant(s);
186                self.chunk.emit_u16(Op::Constant, idx, self.line);
187            }
188            Node::BoolLiteral(true) => self.chunk.emit(Op::True, self.line),
189            Node::BoolLiteral(false) => self.chunk.emit(Op::False, self.line),
190            Node::NilLiteral => self.chunk.emit(Op::Nil, self.line),
191            Node::DurationLiteral(ms) => {
192                let ms = i64::try_from(*ms).map_err(|_| CompileError {
193                    message: "duration literal is too large".to_string(),
194                    line: self.line,
195                })?;
196                let idx = self.chunk.add_constant(Constant::Duration(ms));
197                self.chunk.emit_u16(Op::Constant, idx, self.line);
198            }
199            Node::Identifier(name) => {
200                self.emit_get_binding(name);
201            }
202            Node::LetBinding { pattern, value, .. } => {
203                let binding_type = match &snode.node {
204                    Node::LetBinding {
205                        type_ann: Some(type_ann),
206                        ..
207                    } => Some(type_ann.clone()),
208                    _ => self.infer_expr_type(value),
209                };
210                self.compile_node(value)?;
211                self.compile_destructuring(pattern, false)?;
212                self.record_binding_type(pattern, binding_type.clone());
213                self.maybe_register_owned_drop(pattern, binding_type.as_ref(), snode.span);
214            }
215            Node::VarBinding { pattern, value, .. } => {
216                let binding_type = match &snode.node {
217                    Node::VarBinding {
218                        type_ann: Some(type_ann),
219                        ..
220                    } => Some(type_ann.clone()),
221                    _ => self.infer_expr_type(value),
222                };
223                self.compile_node(value)?;
224                self.compile_destructuring(pattern, true)?;
225                self.record_binding_type(pattern, binding_type.clone());
226                self.maybe_register_owned_drop(pattern, binding_type.as_ref(), snode.span);
227            }
228            Node::ConstBinding {
229                name,
230                type_ann,
231                value,
232            } => {
233                // `const` lowers to the same bytecode as a let-binding
234                // over a simple identifier. The compile-time const-eval
235                // pass in the typechecker has already proven the
236                // initializer is pure and within budget, so re-running
237                // it through the VM is guaranteed to produce the same
238                // value byte-for-byte.
239                let binding_type = type_ann.clone().or_else(|| self.infer_expr_type(value));
240                self.compile_node(value)?;
241                let pattern = harn_parser::BindingPattern::Identifier(name.clone());
242                self.compile_destructuring(&pattern, false)?;
243                self.record_binding_type(&pattern, binding_type.clone());
244                self.maybe_register_owned_drop(&pattern, binding_type.as_ref(), snode.span);
245            }
246            Node::Assignment {
247                target, value, op, ..
248            } => {
249                self.compile_assignment(target, value, op)?;
250            }
251            Node::BinaryOp { op, left, right } => {
252                self.compile_binary_op(op, left, right)?;
253            }
254            Node::UnaryOp { op, operand } => {
255                self.compile_node(operand)?;
256                match op.as_str() {
257                    "-" => self.chunk.emit(Op::Negate, self.line),
258                    "!" => self.chunk.emit(Op::Not, self.line),
259                    _ => {}
260                }
261            }
262            Node::Ternary {
263                condition,
264                true_expr,
265                false_expr,
266            } => {
267                self.compile_node(condition)?;
268                let else_jump = self.chunk.emit_jump(Op::JumpIfFalse, self.line);
269                self.chunk.emit(Op::Pop, self.line);
270                self.compile_node(true_expr)?;
271                let end_jump = self.chunk.emit_jump(Op::Jump, self.line);
272                self.chunk.patch_jump(else_jump);
273                self.chunk.emit(Op::Pop, self.line);
274                self.compile_node(false_expr)?;
275                self.chunk.patch_jump(end_jump);
276            }
277            Node::FunctionCall { name, args, .. } => {
278                self.compile_function_call(name, args)?;
279            }
280            Node::MethodCall {
281                object,
282                method,
283                args,
284            } => {
285                self.compile_method_call(object, method, args)?;
286            }
287            Node::OptionalMethodCall {
288                object,
289                method,
290                args,
291            } => {
292                self.compile_node(object)?;
293                for arg in args {
294                    self.compile_node(arg)?;
295                }
296                let name_idx = self.string_constant(method);
297                self.chunk
298                    .emit_method_call_opt(name_idx, args.len() as u8, self.line);
299            }
300            Node::PropertyAccess { object, property } => {
301                self.compile_property_access(object, property)?;
302            }
303            Node::OptionalPropertyAccess { object, property } => {
304                self.compile_node(object)?;
305                let idx = self.string_constant(property);
306                self.chunk.emit_u16(Op::GetPropertyOpt, idx, self.line);
307            }
308            Node::SubscriptAccess { object, index } => {
309                self.compile_node(object)?;
310                self.compile_node(index)?;
311                self.chunk.emit(Op::Subscript, self.line);
312            }
313            Node::OptionalSubscriptAccess { object, index } => {
314                self.compile_node(object)?;
315                self.compile_node(index)?;
316                self.chunk.emit(Op::SubscriptOpt, self.line);
317            }
318            Node::SliceAccess { object, start, end } => {
319                self.compile_node(object)?;
320                if let Some(s) = start {
321                    self.compile_node(s)?;
322                } else {
323                    self.chunk.emit(Op::Nil, self.line);
324                }
325                if let Some(e) = end {
326                    self.compile_node(e)?;
327                } else {
328                    self.chunk.emit(Op::Nil, self.line);
329                }
330                self.chunk.emit(Op::Slice, self.line);
331            }
332            Node::IfElse {
333                condition,
334                then_body,
335                else_body,
336            } => {
337                self.compile_if_else(condition, then_body, else_body)?;
338            }
339            Node::WhileLoop { condition, body } => {
340                self.compile_while_loop(condition, body)?;
341            }
342            Node::ForIn {
343                pattern,
344                iterable,
345                body,
346            } => {
347                self.compile_for_in(pattern, iterable, body)?;
348            }
349            Node::ReturnStmt { value } => {
350                self.compile_return_stmt(value)?;
351            }
352            Node::BreakStmt => {
353                self.compile_break_stmt()?;
354            }
355            Node::ContinueStmt => {
356                self.compile_continue_stmt()?;
357            }
358            Node::ListLiteral(elements) => {
359                self.compile_list_literal(elements)?;
360            }
361            Node::DictLiteral(entries) => {
362                self.compile_dict_literal(entries)?;
363            }
364            Node::InterpolatedString(segments) => {
365                self.compile_interpolated_string(segments)?;
366            }
367            Node::FnDecl {
368                name,
369                type_params,
370                params,
371                body,
372                is_stream,
373                ..
374            } => {
375                self.compile_fn_decl(name, type_params, params, body, *is_stream)?;
376            }
377            Node::ToolDecl {
378                name,
379                description,
380                params,
381                return_type,
382                body,
383                ..
384            } => {
385                self.compile_tool_decl(name, description, params, return_type, body)?;
386            }
387            Node::SkillDecl { name, fields, .. } => {
388                self.compile_skill_decl(name, fields)?;
389            }
390            Node::EvalPackDecl {
391                binding_name,
392                pack_id,
393                fields,
394                body,
395                summarize,
396                ..
397            } => {
398                self.compile_eval_pack_decl(binding_name, pack_id, fields, body, summarize, true)?;
399            }
400            Node::Closure { params, body, .. } => {
401                self.compile_closure(params, body)?;
402            }
403            Node::ThrowStmt { value } => {
404                self.compile_throw_stmt(value)?;
405            }
406            Node::MatchExpr { value, arms } => {
407                self.compile_match_expr(value, arms)?;
408            }
409            Node::RangeExpr {
410                start,
411                end,
412                inclusive,
413            } => {
414                let name_idx = self.string_constant("__range__");
415                self.chunk.emit_u16(Op::Constant, name_idx, self.line);
416                self.compile_node(start)?;
417                self.compile_node(end)?;
418                if *inclusive {
419                    self.chunk.emit(Op::True, self.line);
420                } else {
421                    self.chunk.emit(Op::False, self.line);
422                }
423                self.chunk.emit_u8(Op::Call, 3, self.line);
424            }
425            Node::GuardStmt {
426                condition,
427                else_body,
428            } => {
429                self.compile_guard_stmt(condition, else_body)?;
430            }
431            Node::RequireStmt { condition, message } => {
432                self.compile_node(condition)?;
433                let ok_jump = self.chunk.emit_jump(Op::JumpIfTrue, self.line);
434                self.chunk.emit(Op::Pop, self.line);
435                if let Some(message) = message {
436                    self.compile_node(message)?;
437                } else {
438                    let idx = self.string_constant("require condition failed");
439                    self.chunk.emit_u16(Op::Constant, idx, self.line);
440                }
441                self.chunk.emit(Op::Throw, self.line);
442                self.chunk.patch_jump(ok_jump);
443                self.chunk.emit(Op::Pop, self.line);
444            }
445            Node::Block(stmts) => {
446                self.compile_scoped_block(stmts)?;
447            }
448            Node::DeadlineBlock { duration, body } => {
449                self.compile_node(duration)?;
450                self.chunk.emit(Op::DeadlineSetup, self.line);
451                self.compile_scoped_block(body)?;
452                self.chunk.emit(Op::DeadlineEnd, self.line);
453            }
454            Node::MutexBlock { key, body } => {
455                self.begin_scope();
456                let finally_floor = self.finally_bodies.len();
457                match key {
458                    // `mutex(resource) { ... }`: evaluate the resource and key
459                    // the lock on its structural value at runtime.
460                    Some(key_expr) => {
461                        self.compile_node(key_expr)?;
462                        self.chunk.emit(Op::SyncMutexEnterKeyed, self.line);
463                    }
464                    // `mutex { ... }`: key on the lexical call-site (computed in
465                    // the VM from the chunk + instruction pointer) so distinct
466                    // blocks don't contend on one global lock.
467                    None => {
468                        self.chunk.emit(Op::SyncMutexEnter, self.line);
469                    }
470                }
471                for sn in body {
472                    self.compile_discarded_stmt(sn)?;
473                }
474                self.drain_finallys_to_floor(finally_floor)?;
475                self.chunk.emit(Op::Nil, self.line);
476                self.end_scope();
477            }
478            Node::ScopeBlock { body } => {
479                // Structured-concurrency nursery. `TaskScopeEnter` pushes a task
480                // scope; tasks spawned inside register to it. `TaskScopeExit`
481                // joins them (propagating the first error, cancelling the rest).
482                // On `throw`/early exit the scope is unwound and its tasks
483                // cancelled by the frame/handler teardown, mirroring
484                // `held_sync_guards`.
485                self.begin_scope();
486                let finally_floor = self.finally_bodies.len();
487                self.chunk.emit(Op::TaskScopeEnter, self.line);
488                for sn in body {
489                    self.compile_discarded_stmt(sn)?;
490                }
491                self.drain_finallys_to_floor(finally_floor)?;
492                self.chunk.emit(Op::TaskScopeExit, self.line);
493                self.chunk.emit(Op::Nil, self.line);
494                self.end_scope();
495            }
496            Node::DeferStmt { body } => {
497                // Register the body to run on return/throw/scope-exit. The
498                // statement emits no bytecode of its own — the deferred body
499                // is inlined later by the finally-draining machinery — so it
500                // leaves the operand stack untouched, matching
501                // `produces_value` == false. Emitting a `Nil` here instead
502                // leaked an unpopped slot per execution, which in a loop body
503                // grew the operand stack without bound (surfaced by the
504                // #2622 balance assertion).
505                self.finally_bodies
506                    .push(FinallyEntry::Finally(body.clone()));
507            }
508            Node::YieldExpr { value } => {
509                if let Some(val) = value {
510                    self.compile_node(val)?;
511                } else {
512                    self.chunk.emit(Op::Nil, self.line);
513                }
514                self.chunk.emit(Op::Yield, self.line);
515            }
516            Node::EmitExpr { value } => {
517                self.compile_node(value)?;
518                self.chunk.emit(Op::Yield, self.line);
519            }
520            Node::EnumConstruct {
521                enum_name,
522                variant,
523                args,
524            } => {
525                self.compile_enum_construct(enum_name, variant, args)?;
526            }
527            Node::StructConstruct {
528                struct_name,
529                fields,
530            } => {
531                self.compile_struct_construct(struct_name, fields)?;
532            }
533            Node::ImportDecl { path, .. } => {
534                let idx = self.string_constant(path);
535                self.chunk.emit_u16(Op::Import, idx, self.line);
536            }
537            Node::SelectiveImport { names, path, .. } => {
538                let path_idx = self.string_constant(path);
539                let names_str = names.join(",");
540                let names_idx = self.owned_string_constant(names_str);
541                self.chunk
542                    .emit_u16(Op::SelectiveImport, path_idx, self.line);
543                let hi = (names_idx >> 8) as u8;
544                let lo = names_idx as u8;
545                self.chunk.code.push(hi);
546                self.chunk.code.push(lo);
547                self.chunk.lines.push(self.line);
548                self.chunk.columns.push(self.column);
549                self.chunk.lines.push(self.line);
550                self.chunk.columns.push(self.column);
551            }
552            Node::TryOperator { operand } => {
553                self.compile_node(operand)?;
554                self.chunk.emit(Op::TryUnwrap, self.line);
555            }
556            // `try* EXPR`: evaluate EXPR; on throw, run pending finally
557            // blocks up to the innermost catch barrier and rethrow the
558            // original value. On success, leave EXPR's value on the stack.
559            //
560            // Per the issue-#26 desugaring:
561            //   { let _r = try { EXPR }
562            //     guard is_ok(_r) else { throw unwrap_err(_r) }
563            //     unwrap(_r) }
564            //
565            // The bytecode realizes this directly: install a try handler
566            // around EXPR so a throw lands in our catch path, where we
567            // pre-run pending finallys and re-emit `Throw`. Skipping the
568            // intermediate Result.Ok/Err wrapping that `TryExpr` does
569            // keeps the success path a no-op (operand value passes through
570            // as-is).
571            Node::TryStar { operand } => {
572                self.compile_try_star(operand)?;
573            }
574            Node::ImplBlock { type_name, methods } => {
575                self.compile_impl_block(type_name, methods)?;
576            }
577            Node::StructDecl { name, fields, .. } => {
578                self.compile_struct_decl(name, fields)?;
579            }
580            // Metadata-only declarations: resolved entirely at compile time
581            // (enum names, type aliases, struct/interface layouts are
582            // pre-scanned), so they emit no bytecode and leave the operand
583            // stack untouched. `produces_value` classifies them as
584            // non-value-producing to match; contexts that require a block to
585            // yield a value (last statement of a block, match-arm body) emit
586            // their own `Nil` placeholder. Emitting one here instead left an
587            // unpopped `Nil` on the stack in every value-discarding context
588            // (`compile_top_level_declarations` pops nothing) — a latent
589            // imbalance surfaced by the #2622 balance assertion.
590            Node::Pipeline { .. }
591            | Node::OverrideDecl { .. }
592            | Node::TypeDecl { .. }
593            | Node::EnumDecl { .. }
594            | Node::InterfaceDecl { .. } => {}
595            Node::TryCatch {
596                has_catch: _,
597                body,
598                error_var,
599                error_type,
600                catch_body,
601                finally_body,
602            } => {
603                self.compile_try_catch(body, error_var, error_type, catch_body, finally_body)?;
604            }
605            Node::TryExpr { body } => {
606                self.compile_try_expr(body)?;
607            }
608            Node::Retry { count, body } => {
609                self.compile_retry(count, body)?;
610            }
611            Node::CostRoute { options, body } => {
612                self.compile_cost_route(options, body)?;
613            }
614            Node::Parallel {
615                mode,
616                expr,
617                variable,
618                body,
619                options,
620            } => {
621                self.compile_parallel(mode, expr, variable, body, options)?;
622            }
623            Node::SpawnExpr { body } => {
624                self.compile_spawn_expr(body)?;
625            }
626            Node::HitlExpr { kind, args } => {
627                self.compile_hitl_expr(*kind, args)?;
628            }
629            Node::SelectExpr {
630                cases,
631                timeout,
632                default_body,
633            } => {
634                self.compile_select_expr(cases, timeout, default_body)?;
635            }
636            Node::Spread(_) => {
637                return Err(CompileError {
638                    message: "spread (...) can only be used inside list literals, dict literals, or function call arguments".into(),
639                    line: self.line,
640                });
641            }
642            Node::AttributedDecl { attributes, inner } => {
643                self.compile_attributed_decl(attributes, inner)?;
644            }
645            Node::OrPattern(_) => {
646                return Err(CompileError {
647                    message: "or-pattern (|) can only appear as a match arm pattern".into(),
648                    line: self.line,
649                });
650            }
651        }
652        Ok(())
653    }
654}