Skip to main content

harn_vm/compiler/
mod.rs

1use harn_parser::{Node, SNode, TypeExpr};
2
3mod closures;
4mod concurrency;
5mod decls;
6mod error;
7mod error_handling;
8mod expressions;
9mod hitl;
10mod optimizer;
11mod patterns;
12mod pipe;
13mod state;
14mod statements;
15#[cfg(test)]
16mod tests;
17mod type_facts;
18mod yield_scan;
19
20pub use error::CompileError;
21
22use crate::chunk::{Chunk, Constant, Op};
23
24/// Environment variable that disables optional compiler optimizations.
25///
26/// The VM still emits structurally required bytecode, such as parameter
27/// slots, but skips semantic-preserving optimizer passes. This gives tests
28/// and benchmarks a stable optimized-vs-unoptimized comparison switch.
29pub const HARN_DISABLE_OPTIMIZATIONS_ENV: &str = "HARN_DISABLE_OPTIMIZATIONS";
30
31/// Controls semantic-preserving compiler optimizations.
32#[derive(Clone, Copy, Debug, PartialEq, Eq)]
33pub struct CompilerOptions {
34    optimize: bool,
35}
36
37impl CompilerOptions {
38    pub fn optimized() -> Self {
39        Self { optimize: true }
40    }
41
42    pub fn without_optimizations() -> Self {
43        Self { optimize: false }
44    }
45
46    pub fn from_env() -> Self {
47        if std::env::var_os(HARN_DISABLE_OPTIMIZATIONS_ENV).is_some() {
48            Self::without_optimizations()
49        } else {
50            Self::optimized()
51        }
52    }
53
54    pub fn optimizations_enabled(self) -> bool {
55        self.optimize
56    }
57}
58
59impl Default for CompilerOptions {
60    fn default() -> Self {
61        Self::optimized()
62    }
63}
64
65/// Look through an `AttributedDecl` wrapper to the inner declaration.
66/// `compile_named` / `compile` use this so attributed declarations like
67/// `@test pipeline foo(...)` are still discoverable by name.
68fn peel_node(sn: &SNode) -> &Node {
69    match &sn.node {
70        Node::AttributedDecl { inner, .. } => &inner.node,
71        other => other,
72    }
73}
74
75/// Entry in the compiler's pending-finally stack. See the field-level doc on
76/// `Compiler::finally_bodies` for the unwind semantics each variant encodes.
77#[derive(Clone, Debug)]
78enum FinallyEntry {
79    Finally(Vec<SNode>),
80    CatchBarrier,
81}
82
83/// Tracks loop context for break/continue compilation.
84struct LoopContext {
85    /// Offset of the loop start (for continue).
86    start_offset: usize,
87    /// Positions of break jumps that need patching to the loop end.
88    break_patches: Vec<usize>,
89    /// True if this is a for-in loop (has an iterator to clean up on break).
90    has_iterator: bool,
91    /// Number of exception handlers active at loop entry.
92    handler_depth: usize,
93    /// Number of pending finally bodies at loop entry.
94    finally_depth: usize,
95    /// Lexical scope depth at loop entry.
96    scope_depth: usize,
97}
98
99#[derive(Clone, Copy, Debug)]
100struct LocalBinding {
101    slot: u16,
102    mutable: bool,
103}
104
105/// Compiles an AST into bytecode.
106pub struct Compiler {
107    options: CompilerOptions,
108    chunk: Chunk,
109    line: u32,
110    column: u32,
111    /// Track enum type names so PropertyAccess on them can produce EnumVariant.
112    enum_names: std::collections::HashSet<String>,
113    /// Track struct type names to declared field order for indexed instances.
114    struct_layouts: std::collections::HashMap<String, Vec<String>>,
115    /// Track interface names → method names for runtime enforcement.
116    interface_methods: std::collections::HashMap<String, Vec<String>>,
117    /// Stack of active loop contexts for break/continue.
118    loop_stack: Vec<LoopContext>,
119    /// Current depth of exception handlers (for cleanup on break/continue).
120    handler_depth: usize,
121    /// Stack of pending finally bodies plus catch-handler barriers for
122    /// unwind-aware lowering of `throw`, `return`, `break`, and `continue`.
123    ///
124    /// A `Finally` entry is a pending finally body that must execute when
125    /// control exits its enclosing try block. A `CatchBarrier` marks the
126    /// boundary of an active `try/catch` handler: throws emitted inside
127    /// the try body are caught locally, so pre-running finallys *beyond*
128    /// the barrier would wrongly fire side effects for outer blocks the
129    /// throw never actually escapes. Throw lowering stops at the innermost
130    /// barrier; `return`/`break`/`continue`, which do transfer past local
131    /// handlers, still run every pending `Finally` up to their target.
132    finally_bodies: Vec<FinallyEntry>,
133    /// Counter for unique temp variable names.
134    temp_counter: usize,
135    /// Number of lexical block scopes currently active in this compiled frame.
136    scope_depth: usize,
137    /// Top-level `type` aliases, used to lower `schema_of(T)` and
138    /// `output_schema: T` into constant JSON-Schema dicts at compile time.
139    type_aliases: std::collections::HashMap<String, TypeExpr>,
140    /// Lightweight compiler-side type facts used only for conservative
141    /// bytecode specialization. This mirrors lexical scopes and is separate
142    /// from the parser's diagnostic type checker so compile-only callers keep
143    /// working without a required type-check pass.
144    type_scopes: Vec<std::collections::HashMap<String, TypeExpr>>,
145    /// Lexical variable slots for the current compiled frame. The compiler
146    /// only consults this for names declared inside the current function-like
147    /// body; all unresolved names stay on the existing dynamic/name path.
148    local_scopes: Vec<std::collections::HashMap<String, LocalBinding>>,
149    /// True when this compiler is emitting code outside any function-like
150    /// scope (module top-level statements). `try*` is rejected here
151    /// because the rethrow has no enclosing function to live in.
152    /// Pipeline bodies and nested `Compiler::new()` instances (fn,
153    /// closure, tool, etc.) flip this to false before compiling.
154    module_level: bool,
155}
156
157impl Compiler {
158    /// Compile a single AST node. Most arm bodies live in per-category
159    /// submodules (expressions, statements, closures, decls, patterns,
160    /// error_handling, concurrency); this function is a thin dispatcher.
161    fn compile_node(&mut self, snode: &SNode) -> Result<(), CompileError> {
162        self.line = snode.span.line as u32;
163        self.column = snode.span.column as u32;
164        self.chunk.set_column(self.column);
165        if self.options.optimizations_enabled() {
166            if let Some(folded) = optimizer::fold_constant_expr(snode) {
167                if folded.node != snode.node {
168                    return self.compile_node(&folded);
169                }
170            }
171        }
172        match &snode.node {
173            Node::IntLiteral(n) => {
174                let idx = self.chunk.add_constant(Constant::Int(*n));
175                self.chunk.emit_u16(Op::Constant, idx, self.line);
176            }
177            Node::FloatLiteral(n) => {
178                let idx = self.chunk.add_constant(Constant::Float(*n));
179                self.chunk.emit_u16(Op::Constant, idx, self.line);
180            }
181            Node::StringLiteral(s) | Node::RawStringLiteral(s) => {
182                let idx = self.chunk.add_constant(Constant::String(s.clone()));
183                self.chunk.emit_u16(Op::Constant, idx, self.line);
184            }
185            Node::BoolLiteral(true) => self.chunk.emit(Op::True, self.line),
186            Node::BoolLiteral(false) => self.chunk.emit(Op::False, self.line),
187            Node::NilLiteral => self.chunk.emit(Op::Nil, self.line),
188            Node::DurationLiteral(ms) => {
189                let ms = i64::try_from(*ms).map_err(|_| CompileError {
190                    message: "duration literal is too large".to_string(),
191                    line: self.line,
192                })?;
193                let idx = self.chunk.add_constant(Constant::Duration(ms));
194                self.chunk.emit_u16(Op::Constant, idx, self.line);
195            }
196            Node::Identifier(name) => {
197                self.emit_get_binding(name);
198            }
199            Node::LetBinding { pattern, value, .. } => {
200                let binding_type = match &snode.node {
201                    Node::LetBinding {
202                        type_ann: Some(type_ann),
203                        ..
204                    } => Some(type_ann.clone()),
205                    _ => self.infer_expr_type(value),
206                };
207                self.compile_node(value)?;
208                self.compile_destructuring(pattern, false)?;
209                self.record_binding_type(pattern, binding_type.clone());
210                self.maybe_register_owned_drop(pattern, binding_type.as_ref(), snode.span);
211            }
212            Node::VarBinding { pattern, value, .. } => {
213                let binding_type = match &snode.node {
214                    Node::VarBinding {
215                        type_ann: Some(type_ann),
216                        ..
217                    } => Some(type_ann.clone()),
218                    _ => self.infer_expr_type(value),
219                };
220                self.compile_node(value)?;
221                self.compile_destructuring(pattern, true)?;
222                self.record_binding_type(pattern, binding_type.clone());
223                self.maybe_register_owned_drop(pattern, binding_type.as_ref(), snode.span);
224            }
225            Node::Assignment {
226                target, value, op, ..
227            } => {
228                self.compile_assignment(target, value, op)?;
229            }
230            Node::BinaryOp { op, left, right } => {
231                self.compile_binary_op(op, left, right)?;
232            }
233            Node::UnaryOp { op, operand } => {
234                self.compile_node(operand)?;
235                match op.as_str() {
236                    "-" => self.chunk.emit(Op::Negate, self.line),
237                    "!" => self.chunk.emit(Op::Not, self.line),
238                    _ => {}
239                }
240            }
241            Node::Ternary {
242                condition,
243                true_expr,
244                false_expr,
245            } => {
246                self.compile_node(condition)?;
247                let else_jump = self.chunk.emit_jump(Op::JumpIfFalse, self.line);
248                self.chunk.emit(Op::Pop, self.line);
249                self.compile_node(true_expr)?;
250                let end_jump = self.chunk.emit_jump(Op::Jump, self.line);
251                self.chunk.patch_jump(else_jump);
252                self.chunk.emit(Op::Pop, self.line);
253                self.compile_node(false_expr)?;
254                self.chunk.patch_jump(end_jump);
255            }
256            Node::FunctionCall { name, args, .. } => {
257                self.compile_function_call(name, args)?;
258            }
259            Node::MethodCall {
260                object,
261                method,
262                args,
263            } => {
264                self.compile_method_call(object, method, args)?;
265            }
266            Node::OptionalMethodCall {
267                object,
268                method,
269                args,
270            } => {
271                self.compile_node(object)?;
272                for arg in args {
273                    self.compile_node(arg)?;
274                }
275                let name_idx = self.chunk.add_constant(Constant::String(method.clone()));
276                self.chunk
277                    .emit_method_call_opt(name_idx, args.len() as u8, self.line);
278            }
279            Node::PropertyAccess { object, property } => {
280                self.compile_property_access(object, property)?;
281            }
282            Node::OptionalPropertyAccess { object, property } => {
283                self.compile_node(object)?;
284                let idx = self.chunk.add_constant(Constant::String(property.clone()));
285                self.chunk.emit_u16(Op::GetPropertyOpt, idx, self.line);
286            }
287            Node::SubscriptAccess { object, index } => {
288                self.compile_node(object)?;
289                self.compile_node(index)?;
290                self.chunk.emit(Op::Subscript, self.line);
291            }
292            Node::OptionalSubscriptAccess { object, index } => {
293                self.compile_node(object)?;
294                self.compile_node(index)?;
295                self.chunk.emit(Op::SubscriptOpt, self.line);
296            }
297            Node::SliceAccess { object, start, end } => {
298                self.compile_node(object)?;
299                if let Some(s) = start {
300                    self.compile_node(s)?;
301                } else {
302                    self.chunk.emit(Op::Nil, self.line);
303                }
304                if let Some(e) = end {
305                    self.compile_node(e)?;
306                } else {
307                    self.chunk.emit(Op::Nil, self.line);
308                }
309                self.chunk.emit(Op::Slice, self.line);
310            }
311            Node::IfElse {
312                condition,
313                then_body,
314                else_body,
315            } => {
316                self.compile_if_else(condition, then_body, else_body)?;
317            }
318            Node::WhileLoop { condition, body } => {
319                self.compile_while_loop(condition, body)?;
320            }
321            Node::ForIn {
322                pattern,
323                iterable,
324                body,
325            } => {
326                self.compile_for_in(pattern, iterable, body)?;
327            }
328            Node::ReturnStmt { value } => {
329                self.compile_return_stmt(value)?;
330            }
331            Node::BreakStmt => {
332                self.compile_break_stmt()?;
333            }
334            Node::ContinueStmt => {
335                self.compile_continue_stmt()?;
336            }
337            Node::ListLiteral(elements) => {
338                self.compile_list_literal(elements)?;
339            }
340            Node::DictLiteral(entries) => {
341                self.compile_dict_literal(entries)?;
342            }
343            Node::InterpolatedString(segments) => {
344                self.compile_interpolated_string(segments)?;
345            }
346            Node::FnDecl {
347                name,
348                type_params,
349                params,
350                body,
351                is_stream,
352                ..
353            } => {
354                self.compile_fn_decl(name, type_params, params, body, *is_stream)?;
355            }
356            Node::ToolDecl {
357                name,
358                description,
359                params,
360                return_type,
361                body,
362                ..
363            } => {
364                self.compile_tool_decl(name, description, params, return_type, body)?;
365            }
366            Node::SkillDecl { name, fields, .. } => {
367                self.compile_skill_decl(name, fields)?;
368            }
369            Node::EvalPackDecl {
370                binding_name,
371                pack_id,
372                fields,
373                body,
374                summarize,
375                ..
376            } => {
377                self.compile_eval_pack_decl(binding_name, pack_id, fields, body, summarize, true)?;
378            }
379            Node::Closure { params, body, .. } => {
380                self.compile_closure(params, body)?;
381            }
382            Node::ThrowStmt { value } => {
383                self.compile_throw_stmt(value)?;
384            }
385            Node::MatchExpr { value, arms } => {
386                self.compile_match_expr(value, arms)?;
387            }
388            Node::RangeExpr {
389                start,
390                end,
391                inclusive,
392            } => {
393                let name_idx = self
394                    .chunk
395                    .add_constant(Constant::String("__range__".to_string()));
396                self.chunk.emit_u16(Op::Constant, name_idx, self.line);
397                self.compile_node(start)?;
398                self.compile_node(end)?;
399                if *inclusive {
400                    self.chunk.emit(Op::True, self.line);
401                } else {
402                    self.chunk.emit(Op::False, self.line);
403                }
404                self.chunk.emit_u8(Op::Call, 3, self.line);
405            }
406            Node::GuardStmt {
407                condition,
408                else_body,
409            } => {
410                self.compile_guard_stmt(condition, else_body)?;
411            }
412            Node::RequireStmt { condition, message } => {
413                self.compile_node(condition)?;
414                let ok_jump = self.chunk.emit_jump(Op::JumpIfTrue, self.line);
415                self.chunk.emit(Op::Pop, self.line);
416                if let Some(message) = message {
417                    self.compile_node(message)?;
418                } else {
419                    let idx = self
420                        .chunk
421                        .add_constant(Constant::String("require condition failed".to_string()));
422                    self.chunk.emit_u16(Op::Constant, idx, self.line);
423                }
424                self.chunk.emit(Op::Throw, self.line);
425                self.chunk.patch_jump(ok_jump);
426                self.chunk.emit(Op::Pop, self.line);
427            }
428            Node::Block(stmts) => {
429                self.compile_scoped_block(stmts)?;
430            }
431            Node::DeadlineBlock { duration, body } => {
432                self.compile_node(duration)?;
433                self.chunk.emit(Op::DeadlineSetup, self.line);
434                self.compile_scoped_block(body)?;
435                self.chunk.emit(Op::DeadlineEnd, self.line);
436            }
437            Node::MutexBlock { body } => {
438                self.begin_scope();
439                let finally_floor = self.finally_bodies.len();
440                let key_idx = self
441                    .chunk
442                    .add_constant(Constant::String("__default__".to_string()));
443                self.chunk.emit_u16(Op::SyncMutexEnter, key_idx, self.line);
444                for sn in body {
445                    self.compile_node(sn)?;
446                    if Self::produces_value(&sn.node) {
447                        self.chunk.emit(Op::Pop, self.line);
448                    }
449                }
450                self.drain_finallys_to_floor(finally_floor)?;
451                self.chunk.emit(Op::Nil, self.line);
452                self.end_scope();
453            }
454            Node::DeferStmt { body } => {
455                // Push onto the finally stack so it runs on return/throw/scope-exit.
456                self.finally_bodies
457                    .push(FinallyEntry::Finally(body.clone()));
458                self.chunk.emit(Op::Nil, self.line);
459            }
460            Node::YieldExpr { value } => {
461                if let Some(val) = value {
462                    self.compile_node(val)?;
463                } else {
464                    self.chunk.emit(Op::Nil, self.line);
465                }
466                self.chunk.emit(Op::Yield, self.line);
467            }
468            Node::EmitExpr { value } => {
469                self.compile_node(value)?;
470                self.chunk.emit(Op::Yield, self.line);
471            }
472            Node::EnumConstruct {
473                enum_name,
474                variant,
475                args,
476            } => {
477                self.compile_enum_construct(enum_name, variant, args)?;
478            }
479            Node::StructConstruct {
480                struct_name,
481                fields,
482            } => {
483                self.compile_struct_construct(struct_name, fields)?;
484            }
485            Node::ImportDecl { path, .. } => {
486                let idx = self.chunk.add_constant(Constant::String(path.clone()));
487                self.chunk.emit_u16(Op::Import, idx, self.line);
488            }
489            Node::SelectiveImport { names, path, .. } => {
490                let path_idx = self.chunk.add_constant(Constant::String(path.clone()));
491                let names_str = names.join(",");
492                let names_idx = self.chunk.add_constant(Constant::String(names_str));
493                self.chunk
494                    .emit_u16(Op::SelectiveImport, path_idx, self.line);
495                let hi = (names_idx >> 8) as u8;
496                let lo = names_idx as u8;
497                self.chunk.code.push(hi);
498                self.chunk.code.push(lo);
499                self.chunk.lines.push(self.line);
500                self.chunk.columns.push(self.column);
501                self.chunk.lines.push(self.line);
502                self.chunk.columns.push(self.column);
503            }
504            Node::TryOperator { operand } => {
505                self.compile_node(operand)?;
506                self.chunk.emit(Op::TryUnwrap, self.line);
507            }
508            // `try* EXPR`: evaluate EXPR; on throw, run pending finally
509            // blocks up to the innermost catch barrier and rethrow the
510            // original value. On success, leave EXPR's value on the stack.
511            //
512            // Per the issue-#26 desugaring:
513            //   { let _r = try { EXPR }
514            //     guard is_ok(_r) else { throw unwrap_err(_r) }
515            //     unwrap(_r) }
516            //
517            // The bytecode realizes this directly: install a try handler
518            // around EXPR so a throw lands in our catch path, where we
519            // pre-run pending finallys and re-emit `Throw`. Skipping the
520            // intermediate Result.Ok/Err wrapping that `TryExpr` does
521            // keeps the success path a no-op (operand value passes through
522            // as-is).
523            Node::TryStar { operand } => {
524                self.compile_try_star(operand)?;
525            }
526            Node::ImplBlock { type_name, methods } => {
527                self.compile_impl_block(type_name, methods)?;
528            }
529            Node::StructDecl { name, fields, .. } => {
530                self.compile_struct_decl(name, fields)?;
531            }
532            // Metadata-only declarations (no runtime effect).
533            Node::Pipeline { .. }
534            | Node::OverrideDecl { .. }
535            | Node::TypeDecl { .. }
536            | Node::EnumDecl { .. }
537            | Node::InterfaceDecl { .. } => {
538                self.chunk.emit(Op::Nil, self.line);
539            }
540            Node::TryCatch {
541                has_catch: _,
542                body,
543                error_var,
544                error_type,
545                catch_body,
546                finally_body,
547            } => {
548                self.compile_try_catch(body, error_var, error_type, catch_body, finally_body)?;
549            }
550            Node::TryExpr { body } => {
551                self.compile_try_expr(body)?;
552            }
553            Node::Retry { count, body } => {
554                self.compile_retry(count, body)?;
555            }
556            Node::CostRoute { options, body } => {
557                self.compile_cost_route(options, body)?;
558            }
559            Node::Parallel {
560                mode,
561                expr,
562                variable,
563                body,
564                options,
565            } => {
566                self.compile_parallel(mode, expr, variable, body, options)?;
567            }
568            Node::SpawnExpr { body } => {
569                self.compile_spawn_expr(body)?;
570            }
571            Node::HitlExpr { kind, args } => {
572                self.compile_hitl_expr(*kind, args)?;
573            }
574            Node::SelectExpr {
575                cases,
576                timeout,
577                default_body,
578            } => {
579                self.compile_select_expr(cases, timeout, default_body)?;
580            }
581            Node::Spread(_) => {
582                return Err(CompileError {
583                    message: "spread (...) can only be used inside list literals, dict literals, or function call arguments".into(),
584                    line: self.line,
585                });
586            }
587            Node::AttributedDecl { attributes, inner } => {
588                self.compile_attributed_decl(attributes, inner)?;
589            }
590            Node::OrPattern(_) => {
591                return Err(CompileError {
592                    message: "or-pattern (|) can only appear as a match arm pattern".into(),
593                    line: self.line,
594                });
595            }
596        }
597        Ok(())
598    }
599}