Skip to main content

harn_vm/
value.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
4use std::sync::Arc;
5
6use crate::chunk::CompiledFunction;
7use crate::mcp::VmMcpClientHandle;
8
9/// An async builtin function for the VM.
10pub type VmAsyncBuiltinFn = Rc<
11    dyn Fn(
12        Vec<VmValue>,
13    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<VmValue, VmError>>>>,
14>;
15
16/// The raw join handle type for spawned tasks.
17pub type VmJoinHandle = tokio::task::JoinHandle<Result<(VmValue, String), VmError>>;
18
19/// A spawned async task handle with cancellation support.
20pub struct VmTaskHandle {
21    pub handle: VmJoinHandle,
22    /// Cooperative cancellation token. Set to true to request graceful shutdown.
23    pub cancel_token: Arc<AtomicBool>,
24}
25
26/// A channel handle for the VM (uses tokio mpsc).
27#[derive(Debug, Clone)]
28pub struct VmChannelHandle {
29    pub name: String,
30    pub sender: Arc<tokio::sync::mpsc::Sender<VmValue>>,
31    pub receiver: Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
32    pub closed: Arc<AtomicBool>,
33}
34
35/// An atomic integer handle for the VM.
36#[derive(Debug, Clone)]
37pub struct VmAtomicHandle {
38    pub value: Arc<AtomicI64>,
39}
40
41/// A generator object: lazily produces values via yield.
42/// The generator body runs as a spawned task that sends values through a channel.
43#[derive(Debug, Clone)]
44pub struct VmGenerator {
45    /// Whether the generator has finished (returned or exhausted).
46    pub done: Rc<std::cell::Cell<bool>>,
47    /// Receiver end of the yield channel (generator sends values here).
48    /// Wrapped in a shared async mutex so recv() can be called without holding
49    /// a RefCell borrow across await points.
50    pub receiver: Rc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
51}
52
53/// VM runtime value.
54#[derive(Debug, Clone)]
55pub enum VmValue {
56    Int(i64),
57    Float(f64),
58    String(Rc<str>),
59    Bool(bool),
60    Nil,
61    List(Rc<Vec<VmValue>>),
62    Dict(Rc<BTreeMap<String, VmValue>>),
63    Closure(Rc<VmClosure>),
64    Duration(u64),
65    EnumVariant {
66        enum_name: String,
67        variant: String,
68        fields: Vec<VmValue>,
69    },
70    StructInstance {
71        struct_name: String,
72        fields: BTreeMap<String, VmValue>,
73    },
74    TaskHandle(String),
75    Channel(VmChannelHandle),
76    Atomic(VmAtomicHandle),
77    McpClient(VmMcpClientHandle),
78    Set(Rc<Vec<VmValue>>),
79    Generator(VmGenerator),
80}
81
82/// A compiled closure value.
83#[derive(Debug, Clone)]
84pub struct VmClosure {
85    pub func: CompiledFunction,
86    pub env: VmEnv,
87}
88
89/// VM environment for variable storage.
90#[derive(Debug, Clone)]
91pub struct VmEnv {
92    pub(crate) scopes: Vec<Scope>,
93}
94
95#[derive(Debug, Clone)]
96pub(crate) struct Scope {
97    pub(crate) vars: BTreeMap<String, (VmValue, bool)>, // (value, mutable)
98}
99
100impl Default for VmEnv {
101    fn default() -> Self {
102        Self::new()
103    }
104}
105
106impl VmEnv {
107    pub fn new() -> Self {
108        Self {
109            scopes: vec![Scope {
110                vars: BTreeMap::new(),
111            }],
112        }
113    }
114
115    pub fn push_scope(&mut self) {
116        self.scopes.push(Scope {
117            vars: BTreeMap::new(),
118        });
119    }
120
121    pub fn get(&self, name: &str) -> Option<VmValue> {
122        for scope in self.scopes.iter().rev() {
123            if let Some((val, _)) = scope.vars.get(name) {
124                return Some(val.clone());
125            }
126        }
127        None
128    }
129
130    pub fn define(&mut self, name: &str, value: VmValue, mutable: bool) -> Result<(), VmError> {
131        if let Some(scope) = self.scopes.last_mut() {
132            if let Some((_, existing_mutable)) = scope.vars.get(name) {
133                if !existing_mutable && !mutable {
134                    return Err(VmError::Runtime(format!(
135                        "Cannot redeclare immutable variable '{name}' in the same scope (use 'var' for mutable bindings)"
136                    )));
137                }
138            }
139            scope.vars.insert(name.to_string(), (value, mutable));
140        }
141        Ok(())
142    }
143
144    pub fn all_variables(&self) -> BTreeMap<String, VmValue> {
145        let mut vars = BTreeMap::new();
146        for scope in &self.scopes {
147            for (name, (value, _)) in &scope.vars {
148                vars.insert(name.clone(), value.clone());
149            }
150        }
151        vars
152    }
153
154    pub fn assign(&mut self, name: &str, value: VmValue) -> Result<(), VmError> {
155        for scope in self.scopes.iter_mut().rev() {
156            if let Some((_, mutable)) = scope.vars.get(name) {
157                if !mutable {
158                    return Err(VmError::ImmutableAssignment(name.to_string()));
159                }
160                scope.vars.insert(name.to_string(), (value, true));
161                return Ok(());
162            }
163        }
164        Err(VmError::UndefinedVariable(name.to_string()))
165    }
166}
167
168/// VM runtime errors.
169/// Compute Levenshtein edit distance between two strings.
170fn levenshtein(a: &str, b: &str) -> usize {
171    let a: Vec<char> = a.chars().collect();
172    let b: Vec<char> = b.chars().collect();
173    let (m, n) = (a.len(), b.len());
174    let mut prev = (0..=n).collect::<Vec<_>>();
175    let mut curr = vec![0; n + 1];
176    for i in 1..=m {
177        curr[0] = i;
178        for j in 1..=n {
179            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
180            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
181        }
182        std::mem::swap(&mut prev, &mut curr);
183    }
184    prev[n]
185}
186
187/// Find the closest match from a list of candidates using Levenshtein distance.
188/// Returns `Some(suggestion)` if a candidate is within `max_dist` edits.
189pub fn closest_match<'a>(name: &str, candidates: impl Iterator<Item = &'a str>) -> Option<String> {
190    let max_dist = match name.len() {
191        0..=2 => 1,
192        3..=5 => 2,
193        _ => 3,
194    };
195    candidates
196        .filter(|c| *c != name && !c.starts_with("__"))
197        .map(|c| (c, levenshtein(name, c)))
198        .filter(|(_, d)| *d <= max_dist)
199        // Prefer smallest distance, then closest length to original, then alphabetical
200        .min_by(|(a, da), (b, db)| {
201            da.cmp(db)
202                .then_with(|| {
203                    let a_diff = (a.len() as isize - name.len() as isize).unsigned_abs();
204                    let b_diff = (b.len() as isize - name.len() as isize).unsigned_abs();
205                    a_diff.cmp(&b_diff)
206                })
207                .then_with(|| a.cmp(b))
208        })
209        .map(|(c, _)| c.to_string())
210}
211
212#[derive(Debug, Clone)]
213pub enum VmError {
214    StackUnderflow,
215    StackOverflow,
216    UndefinedVariable(String),
217    UndefinedBuiltin(String),
218    ImmutableAssignment(String),
219    TypeError(String),
220    Runtime(String),
221    DivisionByZero,
222    Thrown(VmValue),
223    /// Thrown with error category for structured error handling.
224    CategorizedError {
225        message: String,
226        category: ErrorCategory,
227    },
228    Return(VmValue),
229    InvalidInstruction(u8),
230}
231
232/// Error categories for structured error handling in agent orchestration.
233#[derive(Debug, Clone, PartialEq, Eq)]
234pub enum ErrorCategory {
235    /// Network/connection timeout
236    Timeout,
237    /// Authentication/authorization failure
238    Auth,
239    /// Rate limit exceeded
240    RateLimit,
241    /// Tool execution failure
242    ToolError,
243    /// Operation was cancelled
244    Cancelled,
245    /// Resource not found
246    NotFound,
247    /// Circuit breaker is open
248    CircuitOpen,
249    /// Generic/unclassified error
250    Generic,
251}
252
253impl ErrorCategory {
254    pub fn as_str(&self) -> &'static str {
255        match self {
256            ErrorCategory::Timeout => "timeout",
257            ErrorCategory::Auth => "auth",
258            ErrorCategory::RateLimit => "rate_limit",
259            ErrorCategory::ToolError => "tool_error",
260            ErrorCategory::Cancelled => "cancelled",
261            ErrorCategory::NotFound => "not_found",
262            ErrorCategory::CircuitOpen => "circuit_open",
263            ErrorCategory::Generic => "generic",
264        }
265    }
266
267    pub fn parse(s: &str) -> Self {
268        match s {
269            "timeout" => ErrorCategory::Timeout,
270            "auth" => ErrorCategory::Auth,
271            "rate_limit" => ErrorCategory::RateLimit,
272            "tool_error" => ErrorCategory::ToolError,
273            "cancelled" => ErrorCategory::Cancelled,
274            "not_found" => ErrorCategory::NotFound,
275            "circuit_open" => ErrorCategory::CircuitOpen,
276            _ => ErrorCategory::Generic,
277        }
278    }
279}
280
281/// Create a categorized error conveniently.
282pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
283    VmError::CategorizedError {
284        message: message.into(),
285        category,
286    }
287}
288
289/// Extract error category from a VmError.
290///
291/// Classification priority:
292/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
293/// 2. Thrown dict with a "category" field (user-created structured errors)
294/// 3. HTTP status code extraction (standard, unambiguous)
295/// 4. Deadline exceeded (VM-internal)
296/// 5. Fallback to Generic
297pub fn error_to_category(err: &VmError) -> ErrorCategory {
298    match err {
299        VmError::CategorizedError { category, .. } => category.clone(),
300        VmError::Thrown(VmValue::Dict(d)) => d
301            .get("category")
302            .map(|v| ErrorCategory::parse(&v.display()))
303            .unwrap_or(ErrorCategory::Generic),
304        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
305        VmError::Runtime(msg) => classify_error_message(msg),
306        _ => ErrorCategory::Generic,
307    }
308}
309
310/// Classify an error message using HTTP status codes and well-known patterns.
311/// Prefers unambiguous signals (status codes) over substring heuristics.
312fn classify_error_message(msg: &str) -> ErrorCategory {
313    // 1. HTTP status codes — most reliable signal
314    if let Some(cat) = classify_by_http_status(msg) {
315        return cat;
316    }
317    // 2. Well-known error identifiers from major APIs
318    //    (Anthropic, OpenAI, and standard HTTP patterns)
319    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
320        return ErrorCategory::Timeout;
321    }
322    if msg.contains("overloaded_error") || msg.contains("api_error") {
323        // Anthropic-specific error types
324        return ErrorCategory::RateLimit;
325    }
326    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
327        // OpenAI-specific error types
328        return ErrorCategory::RateLimit;
329    }
330    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
331        return ErrorCategory::Auth;
332    }
333    if msg.contains("not_found_error") || msg.contains("model_not_found") {
334        return ErrorCategory::NotFound;
335    }
336    if msg.contains("circuit_open") {
337        return ErrorCategory::CircuitOpen;
338    }
339    ErrorCategory::Generic
340}
341
342/// Classify errors by HTTP status code if one appears in the message.
343/// This is the most reliable classification method since status codes
344/// are standardized (RFC 9110) and unambiguous.
345fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
346    // Extract 3-digit HTTP status codes from common patterns:
347    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
348    for code in extract_http_status_codes(msg) {
349        return Some(match code {
350            401 | 403 => ErrorCategory::Auth,
351            404 | 410 => ErrorCategory::NotFound,
352            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
353            429 | 503 => ErrorCategory::RateLimit,
354            _ => continue,
355        });
356    }
357    None
358}
359
360/// Extract plausible HTTP status codes from an error message.
361fn extract_http_status_codes(msg: &str) -> Vec<u16> {
362    let mut codes = Vec::new();
363    let bytes = msg.as_bytes();
364    for i in 0..bytes.len().saturating_sub(2) {
365        // Look for 3-digit sequences in the 100-599 range
366        if bytes[i].is_ascii_digit()
367            && bytes[i + 1].is_ascii_digit()
368            && bytes[i + 2].is_ascii_digit()
369        {
370            // Ensure it's not part of a longer number
371            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
372            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
373            if before_ok && after_ok {
374                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
375                    if (400..=599).contains(&code) {
376                        codes.push(code);
377                    }
378                }
379            }
380        }
381    }
382    codes
383}
384
385impl std::fmt::Display for VmError {
386    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
387        match self {
388            VmError::StackUnderflow => write!(f, "Stack underflow"),
389            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
390            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
391            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
392            VmError::ImmutableAssignment(n) => {
393                write!(f, "Cannot assign to immutable binding: {n}")
394            }
395            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
396            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
397            VmError::DivisionByZero => write!(f, "Division by zero"),
398            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
399            VmError::CategorizedError { message, category } => {
400                write!(f, "Error [{}]: {}", category.as_str(), message)
401            }
402            VmError::Return(_) => write!(f, "Return from function"),
403            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
404        }
405    }
406}
407
408impl std::error::Error for VmError {}
409
410impl VmValue {
411    pub fn is_truthy(&self) -> bool {
412        match self {
413            VmValue::Bool(b) => *b,
414            VmValue::Nil => false,
415            VmValue::Int(n) => *n != 0,
416            VmValue::Float(n) => *n != 0.0,
417            VmValue::String(s) => !s.is_empty(),
418            VmValue::List(l) => !l.is_empty(),
419            VmValue::Dict(d) => !d.is_empty(),
420            VmValue::Closure(_) => true,
421            VmValue::Duration(ms) => *ms > 0,
422            VmValue::EnumVariant { .. } => true,
423            VmValue::StructInstance { .. } => true,
424            VmValue::TaskHandle(_) => true,
425            VmValue::Channel(_) => true,
426            VmValue::Atomic(_) => true,
427            VmValue::McpClient(_) => true,
428            VmValue::Set(s) => !s.is_empty(),
429            VmValue::Generator(_) => true,
430        }
431    }
432
433    pub fn type_name(&self) -> &'static str {
434        match self {
435            VmValue::String(_) => "string",
436            VmValue::Int(_) => "int",
437            VmValue::Float(_) => "float",
438            VmValue::Bool(_) => "bool",
439            VmValue::Nil => "nil",
440            VmValue::List(_) => "list",
441            VmValue::Dict(_) => "dict",
442            VmValue::Closure(_) => "closure",
443            VmValue::Duration(_) => "duration",
444            VmValue::EnumVariant { .. } => "enum",
445            VmValue::StructInstance { .. } => "struct",
446            VmValue::TaskHandle(_) => "task_handle",
447            VmValue::Channel(_) => "channel",
448            VmValue::Atomic(_) => "atomic",
449            VmValue::McpClient(_) => "mcp_client",
450            VmValue::Set(_) => "set",
451            VmValue::Generator(_) => "generator",
452        }
453    }
454
455    pub fn display(&self) -> String {
456        match self {
457            VmValue::Int(n) => n.to_string(),
458            VmValue::Float(n) => {
459                if *n == (*n as i64) as f64 && n.abs() < 1e15 {
460                    format!("{:.1}", n)
461                } else {
462                    n.to_string()
463                }
464            }
465            VmValue::String(s) => s.to_string(),
466            VmValue::Bool(b) => (if *b { "true" } else { "false" }).to_string(),
467            VmValue::Nil => "nil".to_string(),
468            VmValue::List(items) => {
469                let inner: Vec<String> = items.iter().map(|i| i.display()).collect();
470                format!("[{}]", inner.join(", "))
471            }
472            VmValue::Dict(map) => {
473                let inner: Vec<String> = map
474                    .iter()
475                    .map(|(k, v)| format!("{k}: {}", v.display()))
476                    .collect();
477                format!("{{{}}}", inner.join(", "))
478            }
479            VmValue::Closure(c) => format!("<fn({})>", c.func.params.join(", ")),
480            VmValue::Duration(ms) => {
481                if *ms >= 3_600_000 && ms % 3_600_000 == 0 {
482                    format!("{}h", ms / 3_600_000)
483                } else if *ms >= 60_000 && ms % 60_000 == 0 {
484                    format!("{}m", ms / 60_000)
485                } else if *ms >= 1000 && ms % 1000 == 0 {
486                    format!("{}s", ms / 1000)
487                } else {
488                    format!("{}ms", ms)
489                }
490            }
491            VmValue::EnumVariant {
492                enum_name,
493                variant,
494                fields,
495            } => {
496                if fields.is_empty() {
497                    format!("{enum_name}.{variant}")
498                } else {
499                    let inner: Vec<String> = fields.iter().map(|v| v.display()).collect();
500                    format!("{enum_name}.{variant}({})", inner.join(", "))
501                }
502            }
503            VmValue::StructInstance {
504                struct_name,
505                fields,
506            } => {
507                let inner: Vec<String> = fields
508                    .iter()
509                    .map(|(k, v)| format!("{k}: {}", v.display()))
510                    .collect();
511                format!("{struct_name} {{{}}}", inner.join(", "))
512            }
513            VmValue::TaskHandle(id) => format!("<task:{id}>"),
514            VmValue::Channel(ch) => format!("<channel:{}>", ch.name),
515            VmValue::Atomic(a) => format!("<atomic:{}>", a.value.load(Ordering::SeqCst)),
516            VmValue::McpClient(c) => format!("<mcp_client:{}>", c.name),
517            VmValue::Set(items) => {
518                let inner: Vec<String> = items.iter().map(|i| i.display()).collect();
519                format!("set({})", inner.join(", "))
520            }
521            VmValue::Generator(g) => {
522                if g.done.get() {
523                    "<generator (done)>".to_string()
524                } else {
525                    "<generator>".to_string()
526                }
527            }
528        }
529    }
530
531    /// Get the value as a BTreeMap reference, if it's a Dict.
532    pub fn as_dict(&self) -> Option<&BTreeMap<String, VmValue>> {
533        if let VmValue::Dict(d) = self {
534            Some(d)
535        } else {
536            None
537        }
538    }
539
540    pub fn as_int(&self) -> Option<i64> {
541        if let VmValue::Int(n) = self {
542            Some(*n)
543        } else {
544            None
545        }
546    }
547}
548
549/// Sync builtin function for the VM.
550pub type VmBuiltinFn = Rc<dyn Fn(&[VmValue], &mut String) -> Result<VmValue, VmError>>;
551
552pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
553    match (a, b) {
554        (VmValue::Int(x), VmValue::Int(y)) => x == y,
555        (VmValue::Float(x), VmValue::Float(y)) => x == y,
556        (VmValue::String(x), VmValue::String(y)) => x == y,
557        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
558        (VmValue::Nil, VmValue::Nil) => true,
559        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
560        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
561        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
562        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
563        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
564            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
565        }
566        (VmValue::List(a), VmValue::List(b)) => {
567            a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y))
568        }
569        (VmValue::Dict(a), VmValue::Dict(b)) => {
570            a.len() == b.len()
571                && a.iter()
572                    .zip(b.iter())
573                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
574        }
575        (
576            VmValue::EnumVariant {
577                enum_name: a_e,
578                variant: a_v,
579                fields: a_f,
580            },
581            VmValue::EnumVariant {
582                enum_name: b_e,
583                variant: b_v,
584                fields: b_f,
585            },
586        ) => {
587            a_e == b_e
588                && a_v == b_v
589                && a_f.len() == b_f.len()
590                && a_f.iter().zip(b_f.iter()).all(|(x, y)| values_equal(x, y))
591        }
592        (
593            VmValue::StructInstance {
594                struct_name: a_s,
595                fields: a_f,
596            },
597            VmValue::StructInstance {
598                struct_name: b_s,
599                fields: b_f,
600            },
601        ) => {
602            a_s == b_s
603                && a_f.len() == b_f.len()
604                && a_f
605                    .iter()
606                    .zip(b_f.iter())
607                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
608        }
609        (VmValue::Set(a), VmValue::Set(b)) => {
610            a.len() == b.len() && a.iter().all(|x| b.iter().any(|y| values_equal(x, y)))
611        }
612        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
613        _ => false,
614    }
615}
616
617pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
618    match (a, b) {
619        (VmValue::Int(x), VmValue::Int(y)) => x.cmp(y) as i32,
620        (VmValue::Float(x), VmValue::Float(y)) => {
621            if x < y {
622                -1
623            } else if x > y {
624                1
625            } else {
626                0
627            }
628        }
629        (VmValue::Int(x), VmValue::Float(y)) => {
630            let x = *x as f64;
631            if x < *y {
632                -1
633            } else if x > *y {
634                1
635            } else {
636                0
637            }
638        }
639        (VmValue::Float(x), VmValue::Int(y)) => {
640            let y = *y as f64;
641            if *x < y {
642                -1
643            } else if *x > y {
644                1
645            } else {
646                0
647            }
648        }
649        (VmValue::String(x), VmValue::String(y)) => x.cmp(y) as i32,
650        _ => 0,
651    }
652}