Skip to main content

harn_vm/
step_runtime.rs

1//! Per-step runtime state for `@step`-annotated persona functions.
2//!
3//! The compiler emits a call to the `__register_step` builtin after each
4//! `@step` declaration so the runtime can dispatch on the step's metadata
5//! when its function is invoked. While a step's frame is on the call
6//! stack, an [`ActiveStep`] entry tracks per-step LLM usage, defaults
7//! `llm_call`'s model when the call site doesn't override it, and bounds
8//! cumulative token and cost spend against the step's budget.
9//!
10//! This module owns three thread-locals (a per-program registry, a stack
11//! of currently-active steps, and a log of completed step summaries) but
12//! exposes only narrow helpers — `current_active_step_*` /
13//! `record_step_llm_usage` / etc. — so the call sites in
14//! `crates/harn-vm/src/llm/`, `crates/harn-vm/src/vm/`, and the compiler
15//! stay focused.
16
17use std::cell::RefCell;
18use std::collections::BTreeMap;
19use std::rc::Rc;
20
21use serde::Serialize;
22use serde_json::Value as JsonValue;
23
24use crate::orchestration::HookEvent;
25use crate::value::{VmClosure, VmError, VmValue};
26
27fn vm_str(value: &VmValue) -> Option<&str> {
28    match value {
29        VmValue::String(s) => Some(s.as_ref()),
30        _ => None,
31    }
32}
33
34/// Static metadata captured from a `@step(...)` attribute.
35///
36/// Populated by the `__register_step` builtin (see [`register_step_from_dict`])
37/// when the program first runs, then consulted by `llm_call` and the
38/// frame-pop hooks while the step is active.
39#[derive(Debug, Default, Clone)]
40pub struct StepDefinition {
41    pub name: String,
42    pub function: String,
43    pub model: Option<String>,
44    pub max_tokens: Option<u64>,
45    pub max_usd: Option<f64>,
46    /// One of "fail" (default), "continue", "escalate". Drives how a
47    /// `budget_exceeded` error propagating out of the step is handled —
48    /// see `crates/harn-vm/src/vm/execution.rs`.
49    pub error_boundary: Option<String>,
50}
51
52#[derive(Debug, Default, Clone)]
53pub struct PersonaDefinition {
54    pub name: String,
55}
56
57impl StepDefinition {
58    pub fn boundary(&self) -> StepErrorBoundary {
59        match self.error_boundary.as_deref() {
60            Some("continue") => StepErrorBoundary::Continue,
61            Some("escalate") => StepErrorBoundary::Escalate,
62            _ => StepErrorBoundary::Fail,
63        }
64    }
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum StepErrorBoundary {
69    Fail,
70    Continue,
71    Escalate,
72}
73
74/// Tracks one in-flight step. The `frame_depth` is `Vm::frames.len()`
75/// captured immediately after `push_closure_frame` returns, so an
76/// `ActiveStep` is "alive" while `Vm::frames.len() >= frame_depth`.
77#[derive(Debug, Clone)]
78pub struct ActiveStep {
79    pub frame_depth: usize,
80    pub definition: Rc<StepDefinition>,
81    pub persona: Option<String>,
82    pub args: Vec<VmValue>,
83    pub input_tokens: u64,
84    pub output_tokens: u64,
85    pub cost_usd: f64,
86    pub llm_calls: u32,
87    pub last_model: Option<String>,
88    /// Tracing span id opened when the step's frame was pushed; ended on
89    /// completion. 0 when tracing was disabled at push time, in which
90    /// case `span_end` is a no-op anyway.
91    pub span_id: u64,
92}
93
94impl ActiveStep {
95    fn new(
96        frame_depth: usize,
97        definition: Rc<StepDefinition>,
98        persona: Option<String>,
99        args: Vec<VmValue>,
100        span_id: u64,
101    ) -> Self {
102        Self {
103            frame_depth,
104            definition,
105            persona,
106            args,
107            input_tokens: 0,
108            output_tokens: 0,
109            cost_usd: 0.0,
110            llm_calls: 0,
111            last_model: None,
112            span_id,
113        }
114    }
115
116    fn total_tokens(&self) -> u64 {
117        self.input_tokens.saturating_add(self.output_tokens)
118    }
119}
120
121#[derive(Debug, Clone)]
122pub struct ActivePersona {
123    pub frame_depth: usize,
124    pub definition: Rc<PersonaDefinition>,
125}
126
127/// Snapshot persisted into [`COMPLETED_STEPS`] when the step's frame
128/// unwinds. Receipts and `harn persona inspect`-style downstream consumers
129/// read it back via [`drain_completed_steps`].
130#[derive(Debug, Clone, Serialize)]
131pub struct CompletedStep {
132    pub name: String,
133    pub function: String,
134    pub model: Option<String>,
135    pub input_tokens: u64,
136    pub output_tokens: u64,
137    pub cost_usd: f64,
138    pub llm_calls: u32,
139    pub status: String,
140    pub error: Option<String>,
141}
142
143thread_local! {
144    static STEP_REGISTRY: RefCell<BTreeMap<String, Rc<StepDefinition>>> =
145        const { RefCell::new(BTreeMap::new()) };
146    static PERSONA_REGISTRY: RefCell<BTreeMap<String, Rc<PersonaDefinition>>> =
147        const { RefCell::new(BTreeMap::new()) };
148    static PERSONA_STACK: RefCell<Vec<ActivePersona>> = const { RefCell::new(Vec::new()) };
149    static STEP_STACK: RefCell<Vec<ActiveStep>> = const { RefCell::new(Vec::new()) };
150    static COMPLETED_STEPS: RefCell<Vec<CompletedStep>> = const { RefCell::new(Vec::new()) };
151    static PERSONA_HOOKS: RefCell<Vec<PersonaHookRegistration>> = const { RefCell::new(Vec::new()) };
152}
153
154/// Reset every thread-local owned by this module. Called between test
155/// runs and at the start of each top-level program execution so leftover
156/// registrations don't leak across runs.
157pub fn reset_thread_local_state() {
158    STEP_REGISTRY.with(|r| r.borrow_mut().clear());
159    PERSONA_REGISTRY.with(|r| r.borrow_mut().clear());
160    PERSONA_STACK.with(|s| s.borrow_mut().clear());
161    STEP_STACK.with(|s| s.borrow_mut().clear());
162    COMPLETED_STEPS.with(|c| c.borrow_mut().clear());
163    PERSONA_HOOKS.with(|h| h.borrow_mut().clear());
164}
165
166/// Bind a `@step` function name to its declared metadata. Idempotent: a
167/// second call replaces the prior definition (matches re-evaluation
168/// semantics of `harn run` and the conformance harness).
169pub fn register_step(function: &str, definition: StepDefinition) {
170    STEP_REGISTRY.with(|registry| {
171        registry
172            .borrow_mut()
173            .insert(function.to_string(), Rc::new(definition));
174    });
175}
176
177pub fn register_persona(function: &str, definition: PersonaDefinition) {
178    PERSONA_REGISTRY.with(|registry| {
179        registry
180            .borrow_mut()
181            .insert(function.to_string(), Rc::new(definition));
182    });
183}
184
185pub fn register_persona_from_dict(args: Vec<VmValue>) -> Result<VmValue, VmError> {
186    let function = args
187        .first()
188        .and_then(vm_str)
189        .map(|s| s.to_string())
190        .ok_or_else(|| {
191            VmError::Thrown(VmValue::String(Rc::from(
192                "__register_persona: expected (function_name, metadata_dict)",
193            )))
194        })?;
195    let meta = args
196        .get(1)
197        .and_then(VmValue::as_dict)
198        .cloned()
199        .ok_or_else(|| {
200            VmError::Thrown(VmValue::String(Rc::from(
201                "__register_persona: metadata argument must be a dict",
202            )))
203        })?;
204    let definition = PersonaDefinition {
205        name: meta
206            .get("name")
207            .and_then(vm_str)
208            .map(str::to_string)
209            .unwrap_or_else(|| function.clone()),
210    };
211    register_persona(&function, definition);
212    Ok(VmValue::Nil)
213}
214
215/// Builtin entry point invoked by compiler-emitted bytecode after every
216/// `@step` function declaration. Accepts a dict mirroring
217/// `harn_modules::PersonaStepMetadata`.
218pub fn register_step_from_dict(args: Vec<VmValue>) -> Result<VmValue, VmError> {
219    let function = args
220        .first()
221        .and_then(vm_str)
222        .map(|s| s.to_string())
223        .ok_or_else(|| {
224            VmError::Thrown(VmValue::String(Rc::from(
225                "__register_step: expected (function_name, metadata_dict)",
226            )))
227        })?;
228    let meta = args
229        .get(1)
230        .and_then(VmValue::as_dict)
231        .cloned()
232        .ok_or_else(|| {
233            VmError::Thrown(VmValue::String(Rc::from(
234                "__register_step: metadata argument must be a dict",
235            )))
236        })?;
237
238    let mut definition = StepDefinition {
239        function: function.clone(),
240        ..StepDefinition::default()
241    };
242    definition.name = meta
243        .get("name")
244        .and_then(vm_str)
245        .map(|s| s.to_string())
246        .unwrap_or_else(|| function.clone());
247    definition.model = meta
248        .get("model")
249        .and_then(vm_str)
250        .map(|s| s.to_string())
251        .filter(|s| !s.is_empty());
252    definition.error_boundary = meta
253        .get("error_boundary")
254        .and_then(vm_str)
255        .map(|s| s.to_string());
256
257    if let Some(VmValue::Dict(budget)) = meta.get("budget") {
258        if let Some(value) = budget.get("max_tokens") {
259            definition.max_tokens = match value {
260                VmValue::Int(n) if *n > 0 => Some(*n as u64),
261                VmValue::Float(f) if f.is_finite() && *f > 0.0 => Some(*f as u64),
262                _ => None,
263            };
264        }
265        if let Some(value) = budget.get("max_usd") {
266            definition.max_usd = match value {
267                VmValue::Float(f) if f.is_finite() && *f >= 0.0 => Some(*f),
268                VmValue::Int(n) if *n >= 0 => Some(*n as f64),
269                _ => None,
270            };
271        }
272    }
273
274    register_step(&function, definition);
275    Ok(VmValue::Nil)
276}
277
278#[derive(Clone)]
279pub struct PersonaHookRegistration {
280    pub persona_pattern: String,
281    pub step_name: Option<String>,
282    pub event: HookEvent,
283    pub threshold_pct: Option<f64>,
284    pub handler: Rc<VmClosure>,
285}
286
287impl std::fmt::Debug for PersonaHookRegistration {
288    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289        f.debug_struct("PersonaHookRegistration")
290            .field("persona_pattern", &self.persona_pattern)
291            .field("step_name", &self.step_name)
292            .field("event", &self.event)
293            .field("threshold_pct", &self.threshold_pct)
294            .field("handler", &"..")
295            .finish()
296    }
297}
298
299#[derive(Debug, Clone)]
300pub struct PersonaHookInvocation {
301    pub handler: Rc<VmClosure>,
302    pub event: HookEvent,
303}
304
305pub fn register_persona_hook(
306    persona_pattern: impl Into<String>,
307    event: HookEvent,
308    threshold_pct: Option<f64>,
309    handler: Rc<VmClosure>,
310) {
311    PERSONA_HOOKS.with(|hooks| {
312        hooks.borrow_mut().push(PersonaHookRegistration {
313            persona_pattern: persona_pattern.into(),
314            step_name: None,
315            event,
316            threshold_pct,
317            handler,
318        });
319    });
320}
321
322pub fn register_step_hook(
323    persona_pattern: impl Into<String>,
324    step_name: impl Into<String>,
325    event: HookEvent,
326    threshold_pct: Option<f64>,
327    handler: Rc<VmClosure>,
328) {
329    PERSONA_HOOKS.with(|hooks| {
330        hooks.borrow_mut().push(PersonaHookRegistration {
331            persona_pattern: persona_pattern.into(),
332            step_name: Some(step_name.into()),
333            event,
334            threshold_pct,
335            handler,
336        });
337    });
338}
339
340pub fn clear_persona_hooks() {
341    PERSONA_HOOKS.with(|hooks| hooks.borrow_mut().clear());
342}
343
344pub struct ActiveContextSnapshot {
345    steps: Vec<ActiveStep>,
346    personas: Vec<ActivePersona>,
347}
348
349pub fn take_active_context() -> ActiveContextSnapshot {
350    ActiveContextSnapshot {
351        steps: STEP_STACK.with(|stack| std::mem::take(&mut *stack.borrow_mut())),
352        personas: PERSONA_STACK.with(|stack| std::mem::take(&mut *stack.borrow_mut())),
353    }
354}
355
356pub fn restore_active_context(snapshot: ActiveContextSnapshot) {
357    STEP_STACK.with(|stack| *stack.borrow_mut() = snapshot.steps);
358    PERSONA_STACK.with(|stack| *stack.borrow_mut() = snapshot.personas);
359}
360
361pub fn is_tracked_function(function_name: &str) -> bool {
362    STEP_REGISTRY.with(|registry| registry.borrow().contains_key(function_name))
363        || PERSONA_REGISTRY.with(|registry| registry.borrow().contains_key(function_name))
364}
365
366pub fn step_definition_for_function(function_name: &str) -> Option<Rc<StepDefinition>> {
367    STEP_REGISTRY.with(|registry| registry.borrow().get(function_name).cloned())
368}
369
370pub fn current_persona_name() -> Option<String> {
371    PERSONA_STACK.with(|stack| stack.borrow().last().map(|p| p.definition.name.clone()))
372}
373
374fn persona_matches(pattern: &str, persona: &str) -> bool {
375    crate::orchestration::glob_match(pattern, persona)
376}
377
378pub fn matching_hooks(
379    event: HookEvent,
380    persona: Option<&str>,
381    step_name: Option<&str>,
382    budget_pct: Option<f64>,
383) -> Vec<PersonaHookInvocation> {
384    let persona = persona.unwrap_or("");
385    PERSONA_HOOKS.with(|hooks| {
386        hooks
387            .borrow()
388            .iter()
389            .filter(|hook| hook.event == event)
390            .filter(|hook| persona_matches(&hook.persona_pattern, persona))
391            .filter(|hook| match (&hook.step_name, step_name) {
392                (Some(expected), Some(actual)) => expected == actual,
393                (Some(_), None) => false,
394                (None, _) => true,
395            })
396            .filter(|hook| match (hook.threshold_pct, budget_pct) {
397                (Some(threshold), Some(pct)) => pct >= threshold,
398                (Some(_), None) => false,
399                (None, _) => true,
400            })
401            .map(|hook| PersonaHookInvocation {
402                handler: hook.handler.clone(),
403                event: hook.event,
404            })
405            .collect()
406    })
407}
408
409pub fn maybe_push_active_persona(function_name: &str, frame_depth: usize) -> bool {
410    let definition =
411        PERSONA_REGISTRY.with(|registry| registry.borrow().get(function_name).cloned());
412    let Some(definition) = definition else {
413        return false;
414    };
415    PERSONA_STACK.with(|stack| {
416        stack.borrow_mut().push(ActivePersona {
417            frame_depth,
418            definition,
419        });
420    });
421    true
422}
423
424/// Push an active step onto the stack iff `function_name` has metadata
425/// registered. Returns `true` when a frame was pushed so the call site
426/// can record that fact. Called from `Vm::push_closure_frame` after the
427/// new frame has been added.
428pub fn maybe_push_active_step(function_name: &str, frame_depth: usize, args: &[VmValue]) -> bool {
429    let definition = STEP_REGISTRY.with(|registry| registry.borrow().get(function_name).cloned());
430    let Some(definition) = definition else {
431        return false;
432    };
433    let persona = current_persona_name();
434    let span_id =
435        crate::tracing::span_start(crate::tracing::SpanKind::Step, definition.name.clone());
436    if let Some(persona_name) = persona.as_deref() {
437        crate::tracing::span_set_metadata(
438            span_id,
439            "persona",
440            serde_json::Value::String(persona_name.to_string()),
441        );
442    }
443    if let Some(model) = definition.model.as_deref() {
444        crate::tracing::span_set_metadata(
445            span_id,
446            "model",
447            serde_json::Value::String(model.to_string()),
448        );
449    }
450    STEP_STACK.with(|stack| {
451        stack.borrow_mut().push(ActiveStep::new(
452            frame_depth,
453            definition,
454            persona,
455            args.to_vec(),
456            span_id,
457        ));
458    });
459    true
460}
461
462/// Drop any step entries whose owning frame has already been unwound,
463/// recording a `CompletedStep` summary for each. The `current_frame_depth`
464/// is `Vm::frames.len()` at the call site — entries with
465/// `frame_depth > current_frame_depth` are stale.
466pub fn prune_below_frame(current_frame_depth: usize) {
467    let mut popped: Vec<ActiveStep> = Vec::new();
468    STEP_STACK.with(|stack| {
469        let mut stack = stack.borrow_mut();
470        while let Some(top) = stack.last() {
471            if top.frame_depth > current_frame_depth {
472                popped.push(stack.pop().unwrap());
473            } else {
474                break;
475            }
476        }
477    });
478    for step in popped {
479        finish_step(step, "completed", None);
480    }
481    PERSONA_STACK.with(|stack| {
482        let mut stack = stack.borrow_mut();
483        while stack
484            .last()
485            .is_some_and(|persona| persona.frame_depth > current_frame_depth)
486        {
487            stack.pop();
488        }
489    });
490}
491
492pub fn take_active_step(current_frame_depth: usize) -> Option<ActiveStep> {
493    STEP_STACK.with(|stack| {
494        let mut stack = stack.borrow_mut();
495        if stack
496            .last()
497            .is_some_and(|step| step.frame_depth == current_frame_depth)
498        {
499            stack.pop()
500        } else {
501            None
502        }
503    })
504}
505
506pub fn finish_active_step(step: ActiveStep, status: &str, error: Option<String>) {
507    finish_step(step, status, error);
508}
509
510/// Pop the topmost active step (if its frame is the current one) and
511/// record an explicit completion status. Used when an error boundary
512/// rewrites or absorbs an in-flight error so the receipt log reflects the
513/// outcome the persona actually saw.
514pub fn pop_and_record(current_frame_depth: usize, status: &str, error: Option<String>) -> bool {
515    let popped = STEP_STACK.with(|stack| {
516        let mut stack = stack.borrow_mut();
517        if stack
518            .last()
519            .map(|step| step.frame_depth == current_frame_depth)
520            .unwrap_or(false)
521        {
522            stack.pop()
523        } else {
524            None
525        }
526    });
527    let Some(step) = popped else {
528        return false;
529    };
530    finish_step(step, status, error);
531    true
532}
533
534fn finish_step(step: ActiveStep, status: &str, error: Option<String>) {
535    crate::tracing::span_set_metadata(
536        step.span_id,
537        "status",
538        serde_json::Value::String(status.to_string()),
539    );
540    crate::tracing::span_set_metadata(
541        step.span_id,
542        "llm_calls",
543        serde_json::Value::Number(step.llm_calls.into()),
544    );
545    crate::tracing::span_set_metadata(
546        step.span_id,
547        "input_tokens",
548        serde_json::Value::Number(step.input_tokens.into()),
549    );
550    crate::tracing::span_set_metadata(
551        step.span_id,
552        "output_tokens",
553        serde_json::Value::Number(step.output_tokens.into()),
554    );
555    if let Some(cost_n) = serde_json::Number::from_f64(step.cost_usd) {
556        crate::tracing::span_set_metadata(
557            step.span_id,
558            "cost_usd",
559            serde_json::Value::Number(cost_n),
560        );
561    }
562    crate::tracing::span_end(step.span_id);
563    let summary = CompletedStep {
564        name: step.definition.name.clone(),
565        function: step.definition.function.clone(),
566        model: step
567            .last_model
568            .clone()
569            .or_else(|| step.definition.model.clone()),
570        input_tokens: step.input_tokens,
571        output_tokens: step.output_tokens,
572        cost_usd: step.cost_usd,
573        llm_calls: step.llm_calls,
574        status: status.to_string(),
575        error,
576    };
577    COMPLETED_STEPS.with(|completed| completed.borrow_mut().push(summary));
578}
579
580/// Get a snapshot of the topmost active step, if any. Used by the
581/// llm_call path to fill in defaults — never for mutation.
582pub fn with_active_step<R>(f: impl FnOnce(&ActiveStep) -> R) -> Option<R> {
583    STEP_STACK.with(|stack| stack.borrow().last().map(f))
584}
585
586/// Mutate the topmost active step (typically to attribute LLM usage).
587pub fn with_active_step_mut<R>(f: impl FnOnce(&mut ActiveStep) -> R) -> Option<R> {
588    STEP_STACK.with(|stack| stack.borrow_mut().last_mut().map(f))
589}
590
591/// Frame depth of the topmost active step, or `None` when no step is
592/// active. Used by `handle_error` to detect "this throw is exiting a
593/// step's frame".
594pub fn active_step_frame_depth() -> Option<usize> {
595    STEP_STACK.with(|stack| stack.borrow().last().map(|s| s.frame_depth))
596}
597
598/// Default model the topmost active step should impose on `llm_call`
599/// invocations whose options dict didn't pin a model.
600pub fn active_step_model_default() -> Option<String> {
601    STEP_STACK.with(|stack| {
602        stack
603            .borrow()
604            .last()
605            .and_then(|step| step.definition.model.clone())
606    })
607}
608
609/// Record that `llm_call` consumed `input_tokens` / `output_tokens` for
610/// `cost_usd`. Updates the active step's running totals and returns a
611/// budget-exhaustion error if the step's ceiling is now breached.
612///
613/// The check is performed AFTER the call so the test fixture's first
614/// call (which fits under budget) succeeds and subsequent calls trip the
615/// limit. This matches the existing `accumulate_cost_for_provider`
616/// pattern where global budget is also checked post-hoc.
617pub fn record_step_llm_usage(
618    model: &str,
619    input_tokens: i64,
620    output_tokens: i64,
621    cost_usd: f64,
622) -> Result<(), VmError> {
623    let exhausted = STEP_STACK.with(|stack| -> Option<VmError> {
624        let mut stack = stack.borrow_mut();
625        let step = stack.last_mut()?;
626        step.input_tokens = step.input_tokens.saturating_add(input_tokens.max(0) as u64);
627        step.output_tokens = step
628            .output_tokens
629            .saturating_add(output_tokens.max(0) as u64);
630        step.cost_usd += cost_usd;
631        step.llm_calls = step.llm_calls.saturating_add(1);
632        if !model.is_empty() {
633            step.last_model = Some(model.to_string());
634        }
635
636        if let Some(max_tokens) = step.definition.max_tokens {
637            if step.total_tokens() > max_tokens {
638                return Some(budget_exhausted_error(
639                    &step.definition,
640                    "max_tokens",
641                    max_tokens as f64,
642                    step.total_tokens() as f64,
643                    step.cost_usd,
644                ));
645            }
646        }
647        if let Some(max_usd) = step.definition.max_usd {
648            if step.cost_usd > max_usd {
649                return Some(budget_exhausted_error(
650                    &step.definition,
651                    "max_usd",
652                    max_usd,
653                    step.total_tokens() as f64,
654                    step.cost_usd,
655                ));
656            }
657        }
658        None
659    });
660    if let Some(err) = exhausted {
661        return Err(err);
662    }
663    Ok(())
664}
665
666fn budget_exhausted_error(
667    definition: &StepDefinition,
668    limit: &str,
669    limit_value: f64,
670    consumed_tokens: f64,
671    consumed_cost_usd: f64,
672) -> VmError {
673    let mut dict: BTreeMap<String, VmValue> = BTreeMap::new();
674    dict.insert(
675        "category".to_string(),
676        VmValue::String(Rc::from("budget_exceeded")),
677    );
678    dict.insert(
679        "kind".to_string(),
680        VmValue::String(Rc::from("budget_exhausted")),
681    );
682    dict.insert(
683        "reason".to_string(),
684        VmValue::String(Rc::from("step_budget_exhausted")),
685    );
686    dict.insert(
687        "step".to_string(),
688        VmValue::String(Rc::from(definition.name.clone())),
689    );
690    dict.insert(
691        "function".to_string(),
692        VmValue::String(Rc::from(definition.function.clone())),
693    );
694    dict.insert(
695        "limit".to_string(),
696        VmValue::String(Rc::from(limit.to_string())),
697    );
698    dict.insert("limit_value".to_string(), VmValue::Float(limit_value));
699    dict.insert(
700        "consumed_tokens".to_string(),
701        VmValue::Float(consumed_tokens),
702    );
703    dict.insert(
704        "consumed_cost_usd".to_string(),
705        VmValue::Float(consumed_cost_usd),
706    );
707    dict.insert(
708        "error_boundary".to_string(),
709        VmValue::String(Rc::from(
710            definition
711                .error_boundary
712                .clone()
713                .unwrap_or_else(|| "fail".to_string()),
714        )),
715    );
716    dict.insert(
717        "message".to_string(),
718        VmValue::String(Rc::from(format!(
719            "step `{}` exceeded {} budget ({} > {})",
720            definition.name, limit, consumed_tokens as i64, limit_value as i64
721        ))),
722    );
723    VmError::Thrown(VmValue::Dict(Rc::new(dict)))
724}
725
726/// Returns true if the thrown value looks like a budget-exhausted
727/// error — either our typed step-budget dict or the existing
728/// `crates/harn-vm/src/llm/cost.rs::budget_exceeded_error` shape.
729/// Either form is treated identically by `error_boundary` because the
730/// per-step budget machinery layers onto the existing envelope; a step
731/// whose budget the preflight projection rejects is still a budget
732/// exhaustion the step authored.
733pub fn is_step_budget_exhausted(err: &VmError) -> bool {
734    let VmError::Thrown(VmValue::Dict(dict)) = err else {
735        return false;
736    };
737    let category = dict.get("category").and_then(vm_str);
738    let kind = dict.get("kind").and_then(vm_str);
739    let reason = dict.get("reason").and_then(vm_str);
740    if matches!(kind, Some("budget_exhausted")) && matches!(reason, Some("step_budget_exhausted")) {
741        return true;
742    }
743    matches!(category, Some("budget_exceeded"))
744}
745
746/// Annotate an existing budget-exhausted error with `escalated: true`
747/// and the step's identity so the persona body / handoff receiver can
748/// route on it. Returns the original error if it isn't a thrown dict.
749/// Ensures `step` and `function` keys reflect the just-finished step
750/// even when the underlying error was raised by the preflight budget
751/// machinery (which doesn't know which step it's running under).
752pub fn mark_escalated(err: VmError, step_name: Option<&str>, function: Option<&str>) -> VmError {
753    let VmError::Thrown(VmValue::Dict(dict)) = err else {
754        return err;
755    };
756    let mut next = (*dict).clone();
757    next.insert("escalated".to_string(), VmValue::Bool(true));
758    next.insert(
759        "category".to_string(),
760        VmValue::String(Rc::from("handoff_escalation")),
761    );
762    if let Some(step) = step_name {
763        next.entry("step".to_string())
764            .or_insert_with(|| VmValue::String(Rc::from(step.to_string())));
765    }
766    if let Some(function) = function {
767        next.entry("function".to_string())
768            .or_insert_with(|| VmValue::String(Rc::from(function.to_string())));
769    }
770    VmError::Thrown(VmValue::Dict(Rc::new(next)))
771}
772
773/// Drain the completed-step log. Used by receipt builders that want a
774/// per-step model + token + cost breakdown for the just-finished run.
775pub fn drain_completed_steps() -> Vec<CompletedStep> {
776    COMPLETED_STEPS.with(|completed| std::mem::take(&mut *completed.borrow_mut()))
777}
778
779/// Read the completed-step log without clearing it. Use when callers
780/// want a peek without disturbing the global record stream.
781pub fn peek_completed_steps() -> Vec<CompletedStep> {
782    COMPLETED_STEPS.with(|completed| completed.borrow().clone())
783}
784
785/// Lower a [`CompletedStep`] into JSON for embedding in receipts /
786/// inspect output.
787pub fn completed_step_to_json(step: &CompletedStep) -> JsonValue {
788    serde_json::to_value(step).unwrap_or(JsonValue::Null)
789}
790
791/// Register the `__register_step` host builtin. Compiler-emitted
792/// bytecode after every `@step` declaration calls it with
793/// `(function_name, metadata_dict)` so the runtime can later dispatch on
794/// the step's metadata when its function is invoked.
795pub fn register_step_builtins(vm: &mut crate::vm::Vm) {
796    vm.register_builtin("__register_step", |args, _out| {
797        register_step_from_dict(args.to_vec())
798    });
799    vm.register_builtin("__register_persona", |args, _out| {
800        register_persona_from_dict(args.to_vec())
801    });
802}
803
804#[cfg(test)]
805mod tests {
806    use super::*;
807
808    fn fresh_state() {
809        reset_thread_local_state();
810    }
811
812    #[test]
813    fn registers_and_pops_step_from_dict() {
814        fresh_state();
815        let mut budget: BTreeMap<String, VmValue> = BTreeMap::new();
816        budget.insert("max_tokens".to_string(), VmValue::Int(100));
817        budget.insert("max_usd".to_string(), VmValue::Float(0.05));
818        let mut meta: BTreeMap<String, VmValue> = BTreeMap::new();
819        meta.insert("name".to_string(), VmValue::String(Rc::from("plan")));
820        meta.insert(
821            "model".to_string(),
822            VmValue::String(Rc::from("claude-haiku-4-5")),
823        );
824        meta.insert(
825            "error_boundary".to_string(),
826            VmValue::String(Rc::from("continue")),
827        );
828        meta.insert("budget".to_string(), VmValue::Dict(Rc::new(budget)));
829
830        register_step_from_dict(vec![
831            VmValue::String(Rc::from("plan_step")),
832            VmValue::Dict(Rc::new(meta)),
833        ])
834        .expect("registration succeeds");
835
836        assert!(maybe_push_active_step("plan_step", 3, &[]));
837        assert_eq!(active_step_frame_depth(), Some(3));
838        assert_eq!(
839            active_step_model_default().as_deref(),
840            Some("claude-haiku-4-5")
841        );
842
843        record_step_llm_usage("claude-haiku-4-5", 10, 20, 0.001).expect("under budget");
844        with_active_step(|step| {
845            assert_eq!(step.input_tokens, 10);
846            assert_eq!(step.output_tokens, 20);
847            assert!((step.cost_usd - 0.001).abs() < 1e-9);
848        });
849
850        let err =
851            record_step_llm_usage("claude-haiku-4-5", 50, 50, 0.0).expect_err("should exhaust");
852        assert!(is_step_budget_exhausted(&err));
853
854        prune_below_frame(2);
855        let completed = drain_completed_steps();
856        assert_eq!(completed.len(), 1);
857        assert_eq!(completed[0].llm_calls, 2);
858    }
859
860    #[test]
861    fn unregistered_function_does_not_push() {
862        fresh_state();
863        assert!(!maybe_push_active_step("not_a_step", 1, &[]));
864        assert!(active_step_frame_depth().is_none());
865    }
866}