harn_vm/
value.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
4use std::sync::Arc;
5use std::{cell::RefCell, path::PathBuf};
6
7use crate::chunk::CompiledFunction;
8use crate::mcp::VmMcpClientHandle;
9
10/// An async builtin function for the VM.
11pub type VmAsyncBuiltinFn = Rc<
12    dyn Fn(
13        Vec<VmValue>,
14    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<VmValue, VmError>>>>,
15>;
16
17/// The raw join handle type for spawned tasks.
18pub type VmJoinHandle = tokio::task::JoinHandle<Result<(VmValue, String), VmError>>;
19
20/// A spawned async task handle with cancellation support.
21pub struct VmTaskHandle {
22    pub handle: VmJoinHandle,
23    /// Cooperative cancellation token. Set to true to request graceful shutdown.
24    pub cancel_token: Arc<AtomicBool>,
25}
26
27/// A channel handle for the VM (uses tokio mpsc).
28#[derive(Debug, Clone)]
29pub struct VmChannelHandle {
30    pub name: String,
31    pub sender: Arc<tokio::sync::mpsc::Sender<VmValue>>,
32    pub receiver: Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
33    pub closed: Arc<AtomicBool>,
34}
35
36/// An atomic integer handle for the VM.
37#[derive(Debug, Clone)]
38pub struct VmAtomicHandle {
39    pub value: Arc<AtomicI64>,
40}
41
42/// A lazy integer range — Python-style. Stores only `(start, end, inclusive)`
43/// so the in-memory footprint is O(1) regardless of the range's length.
44/// `len()`, indexing (`r[k]`), `.contains(x)`, `.first()`, `.last()` are all
45/// O(1); direct iteration walks step-by-step without materializing a list.
46///
47/// Empty-range convention (Python-consistent):
48/// - Inclusive empty when `start > end`.
49/// - Exclusive empty when `start >= end`.
50///
51/// Negative / reversed ranges are NOT supported in v1: `5 to 1` is simply
52/// empty. Authors who want reverse iteration should call `.to_list().reverse()`.
53#[derive(Debug, Clone, Copy)]
54pub struct VmRange {
55    pub start: i64,
56    pub end: i64,
57    pub inclusive: bool,
58}
59
60impl VmRange {
61    /// Number of elements this range yields.
62    ///
63    /// Uses saturating arithmetic so that pathological ranges near
64    /// `i64::MAX`/`i64::MIN` do not panic on overflow. Because a range's
65    /// element count must fit in `i64` the returned length saturates at
66    /// `i64::MAX` for ranges whose width exceeds that (e.g. `i64::MIN to
67    /// i64::MAX` inclusive). Callers that later narrow to `usize` for
68    /// allocation should still guard against huge lengths — see
69    /// `to_vec` / `get` for the indexable-range invariants.
70    pub fn len(&self) -> i64 {
71        if self.inclusive {
72            if self.start > self.end {
73                0
74            } else {
75                self.end.saturating_sub(self.start).saturating_add(1)
76            }
77        } else if self.start >= self.end {
78            0
79        } else {
80            self.end.saturating_sub(self.start)
81        }
82    }
83
84    pub fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Element at the given 0-based index, bounds-checked.
89    /// Returns `None` when out of bounds or when `start + idx` would
90    /// overflow (which can only happen when `len()` saturated).
91    pub fn get(&self, idx: i64) -> Option<i64> {
92        if idx < 0 || idx >= self.len() {
93            None
94        } else {
95            self.start.checked_add(idx)
96        }
97    }
98
99    /// First element or `None` when empty.
100    pub fn first(&self) -> Option<i64> {
101        if self.is_empty() {
102            None
103        } else {
104            Some(self.start)
105        }
106    }
107
108    /// Last element or `None` when empty.
109    pub fn last(&self) -> Option<i64> {
110        if self.is_empty() {
111            None
112        } else if self.inclusive {
113            Some(self.end)
114        } else {
115            Some(self.end - 1)
116        }
117    }
118
119    /// Whether `v` falls inside the range (O(1)).
120    pub fn contains(&self, v: i64) -> bool {
121        if self.is_empty() {
122            return false;
123        }
124        if self.inclusive {
125            v >= self.start && v <= self.end
126        } else {
127            v >= self.start && v < self.end
128        }
129    }
130
131    /// Materialize to a `Vec<VmValue>` — the explicit escape hatch.
132    ///
133    /// Uses `checked_add` on the per-element index so a range near
134    /// `i64::MAX` stops at the representable bound instead of panicking.
135    /// Callers should still treat a very long range as unwise to
136    /// materialize (the whole point of `VmRange` is to avoid this).
137    pub fn to_vec(&self) -> Vec<VmValue> {
138        let len = self.len();
139        if len <= 0 {
140            return Vec::new();
141        }
142        let cap = len as usize;
143        let mut out = Vec::with_capacity(cap);
144        for i in 0..len {
145            match self.start.checked_add(i) {
146                Some(v) => out.push(VmValue::Int(v)),
147                None => break,
148            }
149        }
150        out
151    }
152}
153
154/// A generator object: lazily produces values via yield.
155/// The generator body runs as a spawned task that sends values through a channel.
156#[derive(Debug, Clone)]
157pub struct VmGenerator {
158    /// Whether the generator has finished (returned or exhausted).
159    pub done: Rc<std::cell::Cell<bool>>,
160    /// Receiver end of the yield channel (generator sends values here).
161    /// Wrapped in a shared async mutex so recv() can be called without holding
162    /// a RefCell borrow across await points.
163    pub receiver: Rc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
164}
165
166/// VM runtime value.
167#[derive(Debug, Clone)]
168pub enum VmValue {
169    Int(i64),
170    Float(f64),
171    String(Rc<str>),
172    Bool(bool),
173    Nil,
174    List(Rc<Vec<VmValue>>),
175    Dict(Rc<BTreeMap<String, VmValue>>),
176    Closure(Rc<VmClosure>),
177    /// Reference to a registered builtin function, used when a builtin name is
178    /// referenced as a value (e.g. `snake_dict.rekey(snake_to_camel)`). The
179    /// contained string is the builtin's registered name.
180    BuiltinRef(Rc<str>),
181    Duration(u64),
182    EnumVariant {
183        enum_name: String,
184        variant: String,
185        fields: Vec<VmValue>,
186    },
187    StructInstance {
188        struct_name: String,
189        fields: BTreeMap<String, VmValue>,
190    },
191    TaskHandle(String),
192    Channel(VmChannelHandle),
193    Atomic(VmAtomicHandle),
194    McpClient(VmMcpClientHandle),
195    Set(Rc<Vec<VmValue>>),
196    Generator(VmGenerator),
197    Range(VmRange),
198    /// Lazy iterator handle. Single-pass, fused. See `crate::vm::iter::VmIter`.
199    Iter(Rc<RefCell<crate::vm::iter::VmIter>>),
200    /// Two-element pair value. Produced by `pair(a, b)`, yielded by the
201    /// Dict iterator source, and (later) by `zip` / `enumerate` combinators.
202    /// Accessed via `.first` / `.second`, and destructurable in
203    /// `for (a, b) in ...` loops.
204    Pair(Rc<(VmValue, VmValue)>),
205}
206
207/// A compiled closure value.
208#[derive(Debug, Clone)]
209pub struct VmClosure {
210    pub func: CompiledFunction,
211    pub env: VmEnv,
212    /// Source directory for this closure's originating module.
213    /// When set, `render()` and other source-relative builtins resolve
214    /// paths relative to this directory instead of the entry pipeline.
215    pub source_dir: Option<PathBuf>,
216    /// Module-local named functions that should resolve before builtin fallback.
217    /// This lets selectively imported functions keep private sibling helpers
218    /// without exporting them into the caller's environment.
219    pub module_functions: Option<ModuleFunctionRegistry>,
220    /// Shared, mutable module-level env: holds top-level `var` / `let`
221    /// bindings declared at the module root (caches, counters, lazily
222    /// initialized registries). All closures created from the same
223    /// module import point at the same `Rc<RefCell<VmEnv>>`, so a
224    /// mutation inside one function is visible to every other function
225    /// in that module on subsequent calls. `closure.env` still holds
226    /// the per-closure lexical snapshot (captured function args from
227    /// enclosing scopes, etc.) and is unchanged by this — `module_state`
228    /// is a separate lookup layer consulted after the local env and
229    /// before globals. Created in `import_declarations` after the
230    /// module's init chunk runs, so the initial values from `var x = ...`
231    /// land in it.
232    pub module_state: Option<ModuleState>,
233}
234
235pub type ModuleFunctionRegistry = Rc<RefCell<BTreeMap<String, Rc<VmClosure>>>>;
236pub type ModuleState = Rc<RefCell<VmEnv>>;
237
238/// VM environment for variable storage.
239#[derive(Debug, Clone)]
240pub struct VmEnv {
241    pub(crate) scopes: Vec<Scope>,
242}
243
244#[derive(Debug, Clone)]
245pub(crate) struct Scope {
246    pub(crate) vars: BTreeMap<String, (VmValue, bool)>, // (value, mutable)
247}
248
249impl Default for VmEnv {
250    fn default() -> Self {
251        Self::new()
252    }
253}
254
255impl VmEnv {
256    pub fn new() -> Self {
257        Self {
258            scopes: vec![Scope {
259                vars: BTreeMap::new(),
260            }],
261        }
262    }
263
264    pub fn push_scope(&mut self) {
265        self.scopes.push(Scope {
266            vars: BTreeMap::new(),
267        });
268    }
269
270    pub fn pop_scope(&mut self) {
271        if self.scopes.len() > 1 {
272            self.scopes.pop();
273        }
274    }
275
276    pub fn scope_depth(&self) -> usize {
277        self.scopes.len()
278    }
279
280    pub fn truncate_scopes(&mut self, target_depth: usize) {
281        let min_depth = target_depth.max(1);
282        while self.scopes.len() > min_depth {
283            self.scopes.pop();
284        }
285    }
286
287    pub fn get(&self, name: &str) -> Option<VmValue> {
288        for scope in self.scopes.iter().rev() {
289            if let Some((val, _)) = scope.vars.get(name) {
290                return Some(val.clone());
291            }
292        }
293        None
294    }
295
296    pub fn define(&mut self, name: &str, value: VmValue, mutable: bool) -> Result<(), VmError> {
297        if let Some(scope) = self.scopes.last_mut() {
298            if let Some((_, existing_mutable)) = scope.vars.get(name) {
299                if !existing_mutable && !mutable {
300                    return Err(VmError::Runtime(format!(
301                        "Cannot redeclare immutable variable '{name}' in the same scope (use 'var' for mutable bindings)"
302                    )));
303                }
304            }
305            scope.vars.insert(name.to_string(), (value, mutable));
306        }
307        Ok(())
308    }
309
310    pub fn all_variables(&self) -> BTreeMap<String, VmValue> {
311        let mut vars = BTreeMap::new();
312        for scope in &self.scopes {
313            for (name, (value, _)) in &scope.vars {
314                vars.insert(name.clone(), value.clone());
315            }
316        }
317        vars
318    }
319
320    pub fn assign(&mut self, name: &str, value: VmValue) -> Result<(), VmError> {
321        for scope in self.scopes.iter_mut().rev() {
322            if let Some((_, mutable)) = scope.vars.get(name) {
323                if !mutable {
324                    return Err(VmError::ImmutableAssignment(name.to_string()));
325                }
326                scope.vars.insert(name.to_string(), (value, true));
327                return Ok(());
328            }
329        }
330        Err(VmError::UndefinedVariable(name.to_string()))
331    }
332
333    /// Debugger-only variant of `assign` that rebinds the name even if
334    /// the existing binding was declared with `let`. Pipeline authors
335    /// overwhelmingly use `let`, so a strict mutability check would
336    /// make the DAP `setVariable` request useless for "what-if"
337    /// iteration — which is the whole point of the feature. Preserves
338    /// the original mutability flag so the VM's runtime behavior is
339    /// unchanged after the debugger overrides.
340    pub fn assign_debug(&mut self, name: &str, value: VmValue) -> Result<(), VmError> {
341        for scope in self.scopes.iter_mut().rev() {
342            if let Some((_, mutable)) = scope.vars.get(name) {
343                let mutable = *mutable;
344                scope.vars.insert(name.to_string(), (value, mutable));
345                return Ok(());
346            }
347        }
348        Err(VmError::UndefinedVariable(name.to_string()))
349    }
350}
351
352/// Compute Levenshtein edit distance between two strings.
353fn levenshtein(a: &str, b: &str) -> usize {
354    let a: Vec<char> = a.chars().collect();
355    let b: Vec<char> = b.chars().collect();
356    let (m, n) = (a.len(), b.len());
357    let mut prev = (0..=n).collect::<Vec<_>>();
358    let mut curr = vec![0; n + 1];
359    for i in 1..=m {
360        curr[0] = i;
361        for j in 1..=n {
362            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
363            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
364        }
365        std::mem::swap(&mut prev, &mut curr);
366    }
367    prev[n]
368}
369
370/// Find the closest match from a list of candidates using Levenshtein distance.
371/// Returns `Some(suggestion)` if a candidate is within `max_dist` edits.
372pub fn closest_match<'a>(name: &str, candidates: impl Iterator<Item = &'a str>) -> Option<String> {
373    let max_dist = match name.len() {
374        0..=2 => 1,
375        3..=5 => 2,
376        _ => 3,
377    };
378    candidates
379        .filter(|c| *c != name && !c.starts_with("__"))
380        .map(|c| (c, levenshtein(name, c)))
381        .filter(|(_, d)| *d <= max_dist)
382        // Prefer smallest distance, then closest length to original, then alphabetical
383        .min_by(|(a, da), (b, db)| {
384            da.cmp(db)
385                .then_with(|| {
386                    let a_diff = (a.len() as isize - name.len() as isize).unsigned_abs();
387                    let b_diff = (b.len() as isize - name.len() as isize).unsigned_abs();
388                    a_diff.cmp(&b_diff)
389                })
390                .then_with(|| a.cmp(b))
391        })
392        .map(|(c, _)| c.to_string())
393}
394
395#[derive(Debug, Clone)]
396pub enum VmError {
397    StackUnderflow,
398    StackOverflow,
399    UndefinedVariable(String),
400    UndefinedBuiltin(String),
401    ImmutableAssignment(String),
402    TypeError(String),
403    Runtime(String),
404    DivisionByZero,
405    Thrown(VmValue),
406    /// Thrown with error category for structured error handling.
407    CategorizedError {
408        message: String,
409        category: ErrorCategory,
410    },
411    Return(VmValue),
412    InvalidInstruction(u8),
413}
414
415/// Error categories for structured error handling in agent orchestration.
416#[derive(Debug, Clone, PartialEq, Eq)]
417pub enum ErrorCategory {
418    /// Network/connection timeout
419    Timeout,
420    /// Authentication/authorization failure
421    Auth,
422    /// Rate limit exceeded (HTTP 429 / quota)
423    RateLimit,
424    /// Upstream provider is overloaded (HTTP 503 / 529).
425    /// Distinct from RateLimit: the client hasn't exceeded a quota — the
426    /// provider is shedding load and will recover on its own.
427    Overloaded,
428    /// Provider-side 5xx error (500, 502) that isn't specifically overload.
429    ServerError,
430    /// Network-level transient failure (connection reset, DNS hiccup,
431    /// partial stream) — retryable but not provider-status-coded.
432    TransientNetwork,
433    /// LLM output failed schema validation. Retryable via `schema_retries`.
434    SchemaValidation,
435    /// Tool execution failure
436    ToolError,
437    /// Tool was rejected by the host (not permitted / not in allowlist)
438    ToolRejected,
439    /// Operation was cancelled
440    Cancelled,
441    /// Resource not found
442    NotFound,
443    /// Circuit breaker is open
444    CircuitOpen,
445    /// Generic/unclassified error
446    Generic,
447}
448
449impl ErrorCategory {
450    pub fn as_str(&self) -> &'static str {
451        match self {
452            ErrorCategory::Timeout => "timeout",
453            ErrorCategory::Auth => "auth",
454            ErrorCategory::RateLimit => "rate_limit",
455            ErrorCategory::Overloaded => "overloaded",
456            ErrorCategory::ServerError => "server_error",
457            ErrorCategory::TransientNetwork => "transient_network",
458            ErrorCategory::SchemaValidation => "schema_validation",
459            ErrorCategory::ToolError => "tool_error",
460            ErrorCategory::ToolRejected => "tool_rejected",
461            ErrorCategory::Cancelled => "cancelled",
462            ErrorCategory::NotFound => "not_found",
463            ErrorCategory::CircuitOpen => "circuit_open",
464            ErrorCategory::Generic => "generic",
465        }
466    }
467
468    pub fn parse(s: &str) -> Self {
469        match s {
470            "timeout" => ErrorCategory::Timeout,
471            "auth" => ErrorCategory::Auth,
472            "rate_limit" => ErrorCategory::RateLimit,
473            "overloaded" => ErrorCategory::Overloaded,
474            "server_error" => ErrorCategory::ServerError,
475            "transient_network" => ErrorCategory::TransientNetwork,
476            "schema_validation" => ErrorCategory::SchemaValidation,
477            "tool_error" => ErrorCategory::ToolError,
478            "tool_rejected" => ErrorCategory::ToolRejected,
479            "cancelled" => ErrorCategory::Cancelled,
480            "not_found" => ErrorCategory::NotFound,
481            "circuit_open" => ErrorCategory::CircuitOpen,
482            _ => ErrorCategory::Generic,
483        }
484    }
485
486    /// Whether an error of this category is worth retrying for a transient
487    /// provider-side reason. Agent loops consult this to decide whether to
488    /// back off and retry vs surface the error to the user.
489    pub fn is_transient(&self) -> bool {
490        matches!(
491            self,
492            ErrorCategory::Timeout
493                | ErrorCategory::RateLimit
494                | ErrorCategory::Overloaded
495                | ErrorCategory::ServerError
496                | ErrorCategory::TransientNetwork
497        )
498    }
499}
500
501/// Create a categorized error conveniently.
502pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
503    VmError::CategorizedError {
504        message: message.into(),
505        category,
506    }
507}
508
509/// Extract error category from a VmError.
510///
511/// Classification priority:
512/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
513/// 2. Thrown dict with a "category" field (user-created structured errors)
514/// 3. HTTP status code extraction (standard, unambiguous)
515/// 4. Deadline exceeded (VM-internal)
516/// 5. Fallback to Generic
517pub fn error_to_category(err: &VmError) -> ErrorCategory {
518    match err {
519        VmError::CategorizedError { category, .. } => category.clone(),
520        VmError::Thrown(VmValue::Dict(d)) => d
521            .get("category")
522            .map(|v| ErrorCategory::parse(&v.display()))
523            .unwrap_or(ErrorCategory::Generic),
524        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
525        VmError::Runtime(msg) => classify_error_message(msg),
526        _ => ErrorCategory::Generic,
527    }
528}
529
530/// Classify an error message using HTTP status codes and well-known patterns.
531/// Prefers unambiguous signals (status codes) over substring heuristics.
532pub fn classify_error_message(msg: &str) -> ErrorCategory {
533    // 1. HTTP status codes — most reliable signal
534    if let Some(cat) = classify_by_http_status(msg) {
535        return cat;
536    }
537    // 2. Well-known error identifiers from major APIs
538    //    (Anthropic, OpenAI, and standard HTTP patterns)
539    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
540        return ErrorCategory::Timeout;
541    }
542    if msg.contains("overloaded_error") {
543        // Anthropic overloaded_error surfaces as HTTP 529.
544        return ErrorCategory::Overloaded;
545    }
546    if msg.contains("api_error") {
547        // Anthropic catch-all server-side error.
548        return ErrorCategory::ServerError;
549    }
550    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
551        // OpenAI-specific quota error types.
552        return ErrorCategory::RateLimit;
553    }
554    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
555        return ErrorCategory::Auth;
556    }
557    if msg.contains("not_found_error") || msg.contains("model_not_found") {
558        return ErrorCategory::NotFound;
559    }
560    if msg.contains("circuit_open") {
561        return ErrorCategory::CircuitOpen;
562    }
563    // Network-level transient patterns (pre-HTTP-status, pre-provider-framing).
564    let lower = msg.to_lowercase();
565    if lower.contains("connection reset")
566        || lower.contains("connection refused")
567        || lower.contains("connection closed")
568        || lower.contains("broken pipe")
569        || lower.contains("dns error")
570        || lower.contains("stream error")
571        || lower.contains("unexpected eof")
572    {
573        return ErrorCategory::TransientNetwork;
574    }
575    ErrorCategory::Generic
576}
577
578/// Classify errors by HTTP status code if one appears in the message.
579/// This is the most reliable classification method since status codes
580/// are standardized (RFC 9110) and unambiguous.
581fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
582    // Extract 3-digit HTTP status codes from common patterns:
583    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
584    for code in extract_http_status_codes(msg) {
585        return Some(match code {
586            401 | 403 => ErrorCategory::Auth,
587            404 | 410 => ErrorCategory::NotFound,
588            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
589            429 => ErrorCategory::RateLimit,
590            503 | 529 => ErrorCategory::Overloaded,
591            500 | 502 => ErrorCategory::ServerError,
592            _ => continue,
593        });
594    }
595    None
596}
597
598/// Extract plausible HTTP status codes from an error message.
599fn extract_http_status_codes(msg: &str) -> Vec<u16> {
600    let mut codes = Vec::new();
601    let bytes = msg.as_bytes();
602    for i in 0..bytes.len().saturating_sub(2) {
603        // Look for 3-digit sequences in the 100-599 range
604        if bytes[i].is_ascii_digit()
605            && bytes[i + 1].is_ascii_digit()
606            && bytes[i + 2].is_ascii_digit()
607        {
608            // Ensure it's not part of a longer number
609            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
610            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
611            if before_ok && after_ok {
612                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
613                    if (400..=599).contains(&code) {
614                        codes.push(code);
615                    }
616                }
617            }
618        }
619    }
620    codes
621}
622
623impl std::fmt::Display for VmError {
624    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
625        match self {
626            VmError::StackUnderflow => write!(f, "Stack underflow"),
627            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
628            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
629            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
630            VmError::ImmutableAssignment(n) => {
631                write!(f, "Cannot assign to immutable binding: {n}")
632            }
633            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
634            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
635            VmError::DivisionByZero => write!(f, "Division by zero"),
636            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
637            VmError::CategorizedError { message, category } => {
638                write!(f, "Error [{}]: {}", category.as_str(), message)
639            }
640            VmError::Return(_) => write!(f, "Return from function"),
641            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
642        }
643    }
644}
645
646impl std::error::Error for VmError {}
647
648impl VmValue {
649    pub fn is_truthy(&self) -> bool {
650        match self {
651            VmValue::Bool(b) => *b,
652            VmValue::Nil => false,
653            VmValue::Int(n) => *n != 0,
654            VmValue::Float(n) => *n != 0.0,
655            VmValue::String(s) => !s.is_empty(),
656            VmValue::List(l) => !l.is_empty(),
657            VmValue::Dict(d) => !d.is_empty(),
658            VmValue::Closure(_) => true,
659            VmValue::BuiltinRef(_) => true,
660            VmValue::Duration(ms) => *ms > 0,
661            VmValue::EnumVariant { .. } => true,
662            VmValue::StructInstance { .. } => true,
663            VmValue::TaskHandle(_) => true,
664            VmValue::Channel(_) => true,
665            VmValue::Atomic(_) => true,
666            VmValue::McpClient(_) => true,
667            VmValue::Set(s) => !s.is_empty(),
668            VmValue::Generator(_) => true,
669            // Match Python semantics: range objects are always truthy,
670            // even the empty range (analogous to generators / iterators).
671            VmValue::Range(_) => true,
672            VmValue::Iter(_) => true,
673            VmValue::Pair(_) => true,
674        }
675    }
676
677    pub fn type_name(&self) -> &'static str {
678        match self {
679            VmValue::String(_) => "string",
680            VmValue::Int(_) => "int",
681            VmValue::Float(_) => "float",
682            VmValue::Bool(_) => "bool",
683            VmValue::Nil => "nil",
684            VmValue::List(_) => "list",
685            VmValue::Dict(_) => "dict",
686            VmValue::Closure(_) => "closure",
687            VmValue::BuiltinRef(_) => "builtin",
688            VmValue::Duration(_) => "duration",
689            VmValue::EnumVariant { .. } => "enum",
690            VmValue::StructInstance { .. } => "struct",
691            VmValue::TaskHandle(_) => "task_handle",
692            VmValue::Channel(_) => "channel",
693            VmValue::Atomic(_) => "atomic",
694            VmValue::McpClient(_) => "mcp_client",
695            VmValue::Set(_) => "set",
696            VmValue::Generator(_) => "generator",
697            VmValue::Range(_) => "range",
698            VmValue::Iter(_) => "iter",
699            VmValue::Pair(_) => "pair",
700        }
701    }
702
703    pub fn display(&self) -> String {
704        let mut out = String::new();
705        self.write_display(&mut out);
706        out
707    }
708
709    /// Writes the display representation directly into `out`,
710    /// avoiding intermediate Vec<String> allocations for collections.
711    pub fn write_display(&self, out: &mut String) {
712        use std::fmt::Write;
713        match self {
714            VmValue::Int(n) => {
715                let _ = write!(out, "{n}");
716            }
717            VmValue::Float(n) => {
718                if *n == (*n as i64) as f64 && n.abs() < 1e15 {
719                    let _ = write!(out, "{n:.1}");
720                } else {
721                    let _ = write!(out, "{n}");
722                }
723            }
724            VmValue::String(s) => out.push_str(s),
725            VmValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
726            VmValue::Nil => out.push_str("nil"),
727            VmValue::List(items) => {
728                out.push('[');
729                for (i, item) in items.iter().enumerate() {
730                    if i > 0 {
731                        out.push_str(", ");
732                    }
733                    item.write_display(out);
734                }
735                out.push(']');
736            }
737            VmValue::Dict(map) => {
738                out.push('{');
739                for (i, (k, v)) in map.iter().enumerate() {
740                    if i > 0 {
741                        out.push_str(", ");
742                    }
743                    out.push_str(k);
744                    out.push_str(": ");
745                    v.write_display(out);
746                }
747                out.push('}');
748            }
749            VmValue::Closure(c) => {
750                let _ = write!(out, "<fn({})>", c.func.params.join(", "));
751            }
752            VmValue::BuiltinRef(name) => {
753                let _ = write!(out, "<builtin {name}>");
754            }
755            VmValue::Duration(ms) => {
756                if *ms >= 3_600_000 && ms % 3_600_000 == 0 {
757                    let _ = write!(out, "{}h", ms / 3_600_000);
758                } else if *ms >= 60_000 && ms % 60_000 == 0 {
759                    let _ = write!(out, "{}m", ms / 60_000);
760                } else if *ms >= 1000 && ms % 1000 == 0 {
761                    let _ = write!(out, "{}s", ms / 1000);
762                } else {
763                    let _ = write!(out, "{}ms", ms);
764                }
765            }
766            VmValue::EnumVariant {
767                enum_name,
768                variant,
769                fields,
770            } => {
771                if fields.is_empty() {
772                    let _ = write!(out, "{enum_name}.{variant}");
773                } else {
774                    let _ = write!(out, "{enum_name}.{variant}(");
775                    for (i, v) in fields.iter().enumerate() {
776                        if i > 0 {
777                            out.push_str(", ");
778                        }
779                        v.write_display(out);
780                    }
781                    out.push(')');
782                }
783            }
784            VmValue::StructInstance {
785                struct_name,
786                fields,
787            } => {
788                let _ = write!(out, "{struct_name} {{");
789                for (i, (k, v)) in fields.iter().enumerate() {
790                    if i > 0 {
791                        out.push_str(", ");
792                    }
793                    out.push_str(k);
794                    out.push_str(": ");
795                    v.write_display(out);
796                }
797                out.push('}');
798            }
799            VmValue::TaskHandle(id) => {
800                let _ = write!(out, "<task:{id}>");
801            }
802            VmValue::Channel(ch) => {
803                let _ = write!(out, "<channel:{}>", ch.name);
804            }
805            VmValue::Atomic(a) => {
806                let _ = write!(out, "<atomic:{}>", a.value.load(Ordering::SeqCst));
807            }
808            VmValue::McpClient(c) => {
809                let _ = write!(out, "<mcp_client:{}>", c.name);
810            }
811            VmValue::Set(items) => {
812                out.push_str("set(");
813                for (i, item) in items.iter().enumerate() {
814                    if i > 0 {
815                        out.push_str(", ");
816                    }
817                    item.write_display(out);
818                }
819                out.push(')');
820            }
821            VmValue::Generator(g) => {
822                if g.done.get() {
823                    out.push_str("<generator (done)>");
824                } else {
825                    out.push_str("<generator>");
826                }
827            }
828            // Print form mirrors source syntax: `1 to 5` / `0 to 3 exclusive`.
829            // `.to_list()` is the explicit path to materialize for display.
830            VmValue::Range(r) => {
831                let _ = write!(out, "{} to {}", r.start, r.end);
832                if !r.inclusive {
833                    out.push_str(" exclusive");
834                }
835            }
836            VmValue::Iter(h) => {
837                if matches!(&*h.borrow(), crate::vm::iter::VmIter::Exhausted) {
838                    out.push_str("<iter (exhausted)>");
839                } else {
840                    out.push_str("<iter>");
841                }
842            }
843            VmValue::Pair(p) => {
844                out.push('(');
845                p.0.write_display(out);
846                out.push_str(", ");
847                p.1.write_display(out);
848                out.push(')');
849            }
850        }
851    }
852
853    /// Get the value as a BTreeMap reference, if it's a Dict.
854    pub fn as_dict(&self) -> Option<&BTreeMap<String, VmValue>> {
855        if let VmValue::Dict(d) = self {
856            Some(d)
857        } else {
858            None
859        }
860    }
861
862    pub fn as_int(&self) -> Option<i64> {
863        if let VmValue::Int(n) = self {
864            Some(*n)
865        } else {
866            None
867        }
868    }
869}
870
871/// Sync builtin function for the VM.
872pub type VmBuiltinFn = Rc<dyn Fn(&[VmValue], &mut String) -> Result<VmValue, VmError>>;
873
874/// Reference / identity equality. For heap-allocated refcounted values
875/// (List/Dict/Set/Closure) returns true only when both operands share the
876/// same underlying `Rc` allocation. For primitive scalars, falls back to
877/// structural equality (since primitives have no distinct identity).
878pub fn values_identical(a: &VmValue, b: &VmValue) -> bool {
879    match (a, b) {
880        (VmValue::List(x), VmValue::List(y)) => Rc::ptr_eq(x, y),
881        (VmValue::Dict(x), VmValue::Dict(y)) => Rc::ptr_eq(x, y),
882        (VmValue::Set(x), VmValue::Set(y)) => Rc::ptr_eq(x, y),
883        (VmValue::Closure(x), VmValue::Closure(y)) => Rc::ptr_eq(x, y),
884        (VmValue::String(x), VmValue::String(y)) => Rc::ptr_eq(x, y) || x == y,
885        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
886        (VmValue::Pair(x), VmValue::Pair(y)) => Rc::ptr_eq(x, y),
887        // Primitives: identity collapses to structural equality.
888        _ => values_equal(a, b),
889    }
890}
891
892/// Stable identity key for a value. Different allocations produce different
893/// keys; two values with the same heap identity produce the same key. For
894/// primitives the key is derived from the displayed value plus type name so
895/// logically-equal primitives always compare equal.
896pub fn value_identity_key(v: &VmValue) -> String {
897    match v {
898        VmValue::List(x) => format!("list@{:p}", Rc::as_ptr(x)),
899        VmValue::Dict(x) => format!("dict@{:p}", Rc::as_ptr(x)),
900        VmValue::Set(x) => format!("set@{:p}", Rc::as_ptr(x)),
901        VmValue::Closure(x) => format!("closure@{:p}", Rc::as_ptr(x)),
902        VmValue::String(x) => format!("string@{:p}", x.as_ptr()),
903        VmValue::BuiltinRef(name) => format!("builtin@{name}"),
904        other => format!("{}@{}", other.type_name(), other.display()),
905    }
906}
907
908/// Canonical string form used as the keying material for `hash_value`.
909/// Different types never collide (the type name is prepended) and collection
910/// order is preserved so structurally-equal values always produce the same
911/// key. Not intended for cross-process stability; depends on the in-process
912/// iteration order for collections (Dict uses BTreeMap so keys are sorted).
913pub fn value_structural_hash_key(v: &VmValue) -> String {
914    let mut out = String::new();
915    write_structural_hash_key(v, &mut out);
916    out
917}
918
919/// Writes the structural hash key for a value directly into `out`,
920/// avoiding intermediate allocations. Uses length-prefixed encoding
921/// for strings and dict keys to prevent separator collisions.
922fn write_structural_hash_key(v: &VmValue, out: &mut String) {
923    match v {
924        VmValue::Nil => out.push('N'),
925        VmValue::Bool(b) => {
926            out.push(if *b { 'T' } else { 'F' });
927        }
928        VmValue::Int(n) => {
929            out.push('i');
930            out.push_str(&n.to_string());
931            out.push(';');
932        }
933        VmValue::Float(n) => {
934            out.push('f');
935            out.push_str(&n.to_bits().to_string());
936            out.push(';');
937        }
938        VmValue::String(s) => {
939            // Length-prefixed: s<len>:<content> — no ambiguity from content
940            out.push('s');
941            out.push_str(&s.len().to_string());
942            out.push(':');
943            out.push_str(s);
944        }
945        VmValue::Duration(ms) => {
946            out.push('d');
947            out.push_str(&ms.to_string());
948            out.push(';');
949        }
950        VmValue::List(items) => {
951            out.push('L');
952            for item in items.iter() {
953                write_structural_hash_key(item, out);
954                out.push(',');
955            }
956            out.push(']');
957        }
958        VmValue::Dict(map) => {
959            out.push('D');
960            for (k, v) in map.iter() {
961                // Length-prefixed key
962                out.push_str(&k.len().to_string());
963                out.push(':');
964                out.push_str(k);
965                out.push('=');
966                write_structural_hash_key(v, out);
967                out.push(',');
968            }
969            out.push('}');
970        }
971        VmValue::Set(items) => {
972            // Sets need sorted keys for order-independence
973            let mut keys: Vec<String> = items.iter().map(value_structural_hash_key).collect();
974            keys.sort();
975            out.push('S');
976            for k in &keys {
977                out.push_str(k);
978                out.push(',');
979            }
980            out.push('}');
981        }
982        other => {
983            let tn = other.type_name();
984            out.push('o');
985            out.push_str(&tn.len().to_string());
986            out.push(':');
987            out.push_str(tn);
988            let d = other.display();
989            out.push_str(&d.len().to_string());
990            out.push(':');
991            out.push_str(&d);
992        }
993    }
994}
995
996pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
997    match (a, b) {
998        (VmValue::Int(x), VmValue::Int(y)) => x == y,
999        (VmValue::Float(x), VmValue::Float(y)) => x == y,
1000        (VmValue::String(x), VmValue::String(y)) => x == y,
1001        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
1002        (VmValue::Nil, VmValue::Nil) => true,
1003        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
1004        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
1005        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
1006        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
1007        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
1008            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
1009        }
1010        (VmValue::List(a), VmValue::List(b)) => {
1011            a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y))
1012        }
1013        (VmValue::Dict(a), VmValue::Dict(b)) => {
1014            a.len() == b.len()
1015                && a.iter()
1016                    .zip(b.iter())
1017                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
1018        }
1019        (
1020            VmValue::EnumVariant {
1021                enum_name: a_e,
1022                variant: a_v,
1023                fields: a_f,
1024            },
1025            VmValue::EnumVariant {
1026                enum_name: b_e,
1027                variant: b_v,
1028                fields: b_f,
1029            },
1030        ) => {
1031            a_e == b_e
1032                && a_v == b_v
1033                && a_f.len() == b_f.len()
1034                && a_f.iter().zip(b_f.iter()).all(|(x, y)| values_equal(x, y))
1035        }
1036        (
1037            VmValue::StructInstance {
1038                struct_name: a_s,
1039                fields: a_f,
1040            },
1041            VmValue::StructInstance {
1042                struct_name: b_s,
1043                fields: b_f,
1044            },
1045        ) => {
1046            a_s == b_s
1047                && a_f.len() == b_f.len()
1048                && a_f
1049                    .iter()
1050                    .zip(b_f.iter())
1051                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
1052        }
1053        (VmValue::Set(a), VmValue::Set(b)) => {
1054            a.len() == b.len() && a.iter().all(|x| b.iter().any(|y| values_equal(x, y)))
1055        }
1056        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
1057        (VmValue::Range(a), VmValue::Range(b)) => {
1058            a.start == b.start && a.end == b.end && a.inclusive == b.inclusive
1059        }
1060        (VmValue::Iter(a), VmValue::Iter(b)) => Rc::ptr_eq(a, b),
1061        (VmValue::Pair(a), VmValue::Pair(b)) => {
1062            values_equal(&a.0, &b.0) && values_equal(&a.1, &b.1)
1063        }
1064        _ => false,
1065    }
1066}
1067
1068pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
1069    match (a, b) {
1070        (VmValue::Int(x), VmValue::Int(y)) => x.cmp(y) as i32,
1071        (VmValue::Float(x), VmValue::Float(y)) => {
1072            if x < y {
1073                -1
1074            } else if x > y {
1075                1
1076            } else {
1077                0
1078            }
1079        }
1080        (VmValue::Int(x), VmValue::Float(y)) => {
1081            let x = *x as f64;
1082            if x < *y {
1083                -1
1084            } else if x > *y {
1085                1
1086            } else {
1087                0
1088            }
1089        }
1090        (VmValue::Float(x), VmValue::Int(y)) => {
1091            let y = *y as f64;
1092            if *x < y {
1093                -1
1094            } else if *x > y {
1095                1
1096            } else {
1097                0
1098            }
1099        }
1100        (VmValue::String(x), VmValue::String(y)) => x.cmp(y) as i32,
1101        (VmValue::Pair(x), VmValue::Pair(y)) => {
1102            let c = compare_values(&x.0, &y.0);
1103            if c != 0 {
1104                c
1105            } else {
1106                compare_values(&x.1, &y.1)
1107            }
1108        }
1109        _ => 0,
1110    }
1111}
1112
1113#[cfg(test)]
1114mod tests {
1115    use super::*;
1116
1117    fn s(val: &str) -> VmValue {
1118        VmValue::String(Rc::from(val))
1119    }
1120    fn i(val: i64) -> VmValue {
1121        VmValue::Int(val)
1122    }
1123    fn list(items: Vec<VmValue>) -> VmValue {
1124        VmValue::List(Rc::new(items))
1125    }
1126    fn dict(pairs: Vec<(&str, VmValue)>) -> VmValue {
1127        VmValue::Dict(Rc::new(
1128            pairs.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
1129        ))
1130    }
1131
1132    #[test]
1133    fn hash_key_cross_type_distinct() {
1134        // Int(1) vs String("1") vs Bool(true) must all differ
1135        let k_int = value_structural_hash_key(&i(1));
1136        let k_str = value_structural_hash_key(&s("1"));
1137        let k_bool = value_structural_hash_key(&VmValue::Bool(true));
1138        assert_ne!(k_int, k_str);
1139        assert_ne!(k_int, k_bool);
1140        assert_ne!(k_str, k_bool);
1141    }
1142
1143    #[test]
1144    fn hash_key_string_with_separator_chars() {
1145        // ["a,string:b"] (1-element list) vs ["a", "b"] (2-element list)
1146        let one_elem = list(vec![s("a,string:b")]);
1147        let two_elem = list(vec![s("a"), s("b")]);
1148        assert_ne!(
1149            value_structural_hash_key(&one_elem),
1150            value_structural_hash_key(&two_elem),
1151            "length-prefixed strings must prevent separator collisions"
1152        );
1153    }
1154
1155    #[test]
1156    fn hash_key_dict_key_with_equals() {
1157        // Dict with key "a=b" vs dict with key "a" and value containing "b"
1158        let d1 = dict(vec![("a=b", i(1))]);
1159        let d2 = dict(vec![("a", i(1))]);
1160        assert_ne!(
1161            value_structural_hash_key(&d1),
1162            value_structural_hash_key(&d2)
1163        );
1164    }
1165
1166    #[test]
1167    fn hash_key_nested_list_vs_flat() {
1168        // [[1]] vs [1]
1169        let nested = list(vec![list(vec![i(1)])]);
1170        let flat = list(vec![i(1)]);
1171        assert_ne!(
1172            value_structural_hash_key(&nested),
1173            value_structural_hash_key(&flat)
1174        );
1175    }
1176
1177    #[test]
1178    fn hash_key_nil() {
1179        assert_eq!(
1180            value_structural_hash_key(&VmValue::Nil),
1181            value_structural_hash_key(&VmValue::Nil)
1182        );
1183    }
1184
1185    #[test]
1186    fn hash_key_float_zero_vs_neg_zero() {
1187        let pos = VmValue::Float(0.0);
1188        let neg = VmValue::Float(-0.0);
1189        // 0.0 and -0.0 have different bit representations
1190        assert_ne!(
1191            value_structural_hash_key(&pos),
1192            value_structural_hash_key(&neg)
1193        );
1194    }
1195
1196    #[test]
1197    fn hash_key_equal_values_match() {
1198        let a = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1199        let b = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1200        assert_eq!(value_structural_hash_key(&a), value_structural_hash_key(&b));
1201    }
1202
1203    #[test]
1204    fn hash_key_dict_with_comma_key() {
1205        let d1 = dict(vec![("a,b", i(1))]);
1206        let d2 = dict(vec![("a", i(1))]);
1207        assert_ne!(
1208            value_structural_hash_key(&d1),
1209            value_structural_hash_key(&d2)
1210        );
1211    }
1212
1213    // --- VmRange arithmetic safety at i64 boundaries ---
1214    //
1215    // These guard the saturating/checked arithmetic in `VmRange::len` and
1216    // `VmRange::get` / `VmRange::to_vec`. Before the saturating rewrite the
1217    // inclusive `i64::MIN to 0` case panicked in debug builds on
1218    // `(end - start) + 1`.
1219
1220    #[test]
1221    fn vm_range_len_inclusive_saturates_at_i64_max() {
1222        let r = VmRange {
1223            start: i64::MIN,
1224            end: 0,
1225            inclusive: true,
1226        };
1227        // True width overflows i64; saturating at i64::MAX keeps this total.
1228        assert_eq!(r.len(), i64::MAX);
1229    }
1230
1231    #[test]
1232    fn vm_range_len_exclusive_full_range_saturates() {
1233        let r = VmRange {
1234            start: i64::MIN,
1235            end: i64::MAX,
1236            inclusive: false,
1237        };
1238        assert_eq!(r.len(), i64::MAX);
1239    }
1240
1241    #[test]
1242    fn vm_range_len_inclusive_full_range_saturates() {
1243        let r = VmRange {
1244            start: i64::MIN,
1245            end: i64::MAX,
1246            inclusive: true,
1247        };
1248        assert_eq!(r.len(), i64::MAX);
1249    }
1250
1251    #[test]
1252    fn vm_range_get_near_max_does_not_overflow() {
1253        let r = VmRange {
1254            start: i64::MAX - 2,
1255            end: i64::MAX,
1256            inclusive: true,
1257        };
1258        assert_eq!(r.len(), 3);
1259        assert_eq!(r.get(0), Some(i64::MAX - 2));
1260        assert_eq!(r.get(2), Some(i64::MAX));
1261        assert_eq!(r.get(3), None);
1262    }
1263
1264    #[test]
1265    fn vm_range_reversed_is_empty() {
1266        let r = VmRange {
1267            start: 5,
1268            end: 1,
1269            inclusive: true,
1270        };
1271        assert!(r.is_empty());
1272        assert_eq!(r.len(), 0);
1273        assert_eq!(r.first(), None);
1274        assert_eq!(r.last(), None);
1275    }
1276
1277    #[test]
1278    fn vm_range_contains_near_bounds() {
1279        let r = VmRange {
1280            start: 1,
1281            end: 5,
1282            inclusive: true,
1283        };
1284        assert!(r.contains(1));
1285        assert!(r.contains(5));
1286        assert!(!r.contains(0));
1287        assert!(!r.contains(6));
1288        let r = VmRange {
1289            start: 1,
1290            end: 5,
1291            inclusive: false,
1292        };
1293        assert!(r.contains(1));
1294        assert!(r.contains(4));
1295        assert!(!r.contains(5));
1296    }
1297
1298    #[test]
1299    fn vm_range_to_vec_matches_direct_iteration() {
1300        let r = VmRange {
1301            start: -2,
1302            end: 2,
1303            inclusive: true,
1304        };
1305        let v = r.to_vec();
1306        assert_eq!(v.len(), 5);
1307        assert_eq!(
1308            v.iter()
1309                .map(|x| match x {
1310                    VmValue::Int(n) => *n,
1311                    _ => panic!("non-int in range"),
1312                })
1313                .collect::<Vec<_>>(),
1314            vec![-2, -1, 0, 1, 2]
1315        );
1316    }
1317}
harn_vm/value.rs

harn_vm/
value.rs