harn_vm/
value.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
4use std::sync::Arc;
5use std::{cell::RefCell, path::PathBuf};
6
7use crate::chunk::CompiledFunction;
8use crate::mcp::VmMcpClientHandle;
9
10/// An async builtin function for the VM.
11pub type VmAsyncBuiltinFn = Rc<
12    dyn Fn(
13        Vec<VmValue>,
14    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<VmValue, VmError>>>>,
15>;
16
17/// The raw join handle type for spawned tasks.
18pub type VmJoinHandle = tokio::task::JoinHandle<Result<(VmValue, String), VmError>>;
19
20/// A spawned async task handle with cancellation support.
21pub struct VmTaskHandle {
22    pub handle: VmJoinHandle,
23    /// Cooperative cancellation token. Set to true to request graceful shutdown.
24    pub cancel_token: Arc<AtomicBool>,
25}
26
27/// A channel handle for the VM (uses tokio mpsc).
28#[derive(Debug, Clone)]
29pub struct VmChannelHandle {
30    pub name: String,
31    pub sender: Arc<tokio::sync::mpsc::Sender<VmValue>>,
32    pub receiver: Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
33    pub closed: Arc<AtomicBool>,
34}
35
36/// An atomic integer handle for the VM.
37#[derive(Debug, Clone)]
38pub struct VmAtomicHandle {
39    pub value: Arc<AtomicI64>,
40}
41
42/// A lazy integer range — Python-style. Stores only `(start, end, inclusive)`
43/// so the in-memory footprint is O(1) regardless of the range's length.
44/// `len()`, indexing (`r[k]`), `.contains(x)`, `.first()`, `.last()` are all
45/// O(1); direct iteration walks step-by-step without materializing a list.
46///
47/// Empty-range convention (Python-consistent):
48/// - Inclusive empty when `start > end`.
49/// - Exclusive empty when `start >= end`.
50///
51/// Negative / reversed ranges are NOT supported in v1: `5 to 1` is simply
52/// empty. Authors who want reverse iteration should call `.to_list().reverse()`.
53#[derive(Debug, Clone, Copy)]
54pub struct VmRange {
55    pub start: i64,
56    pub end: i64,
57    pub inclusive: bool,
58}
59
60impl VmRange {
61    /// Number of elements this range yields.
62    ///
63    /// Uses saturating arithmetic so that pathological ranges near
64    /// `i64::MAX`/`i64::MIN` do not panic on overflow. Because a range's
65    /// element count must fit in `i64` the returned length saturates at
66    /// `i64::MAX` for ranges whose width exceeds that (e.g. `i64::MIN to
67    /// i64::MAX` inclusive). Callers that later narrow to `usize` for
68    /// allocation should still guard against huge lengths — see
69    /// `to_vec` / `get` for the indexable-range invariants.
70    pub fn len(&self) -> i64 {
71        if self.inclusive {
72            if self.start > self.end {
73                0
74            } else {
75                self.end.saturating_sub(self.start).saturating_add(1)
76            }
77        } else if self.start >= self.end {
78            0
79        } else {
80            self.end.saturating_sub(self.start)
81        }
82    }
83
84    pub fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Element at the given 0-based index, bounds-checked.
89    /// Returns `None` when out of bounds or when `start + idx` would
90    /// overflow (which can only happen when `len()` saturated).
91    pub fn get(&self, idx: i64) -> Option<i64> {
92        if idx < 0 || idx >= self.len() {
93            None
94        } else {
95            self.start.checked_add(idx)
96        }
97    }
98
99    /// First element or `None` when empty.
100    pub fn first(&self) -> Option<i64> {
101        if self.is_empty() {
102            None
103        } else {
104            Some(self.start)
105        }
106    }
107
108    /// Last element or `None` when empty.
109    pub fn last(&self) -> Option<i64> {
110        if self.is_empty() {
111            None
112        } else if self.inclusive {
113            Some(self.end)
114        } else {
115            Some(self.end - 1)
116        }
117    }
118
119    /// Whether `v` falls inside the range (O(1)).
120    pub fn contains(&self, v: i64) -> bool {
121        if self.is_empty() {
122            return false;
123        }
124        if self.inclusive {
125            v >= self.start && v <= self.end
126        } else {
127            v >= self.start && v < self.end
128        }
129    }
130
131    /// Materialize to a `Vec<VmValue>` — the explicit escape hatch.
132    ///
133    /// Uses `checked_add` on the per-element index so a range near
134    /// `i64::MAX` stops at the representable bound instead of panicking.
135    /// Callers should still treat a very long range as unwise to
136    /// materialize (the whole point of `VmRange` is to avoid this).
137    pub fn to_vec(&self) -> Vec<VmValue> {
138        let len = self.len();
139        if len <= 0 {
140            return Vec::new();
141        }
142        let cap = len as usize;
143        let mut out = Vec::with_capacity(cap);
144        for i in 0..len {
145            match self.start.checked_add(i) {
146                Some(v) => out.push(VmValue::Int(v)),
147                None => break,
148            }
149        }
150        out
151    }
152}
153
154/// A generator object: lazily produces values via yield.
155/// The generator body runs as a spawned task that sends values through a channel.
156#[derive(Debug, Clone)]
157pub struct VmGenerator {
158    /// Whether the generator has finished (returned or exhausted).
159    pub done: Rc<std::cell::Cell<bool>>,
160    /// Receiver end of the yield channel (generator sends values here).
161    /// Wrapped in a shared async mutex so recv() can be called without holding
162    /// a RefCell borrow across await points.
163    pub receiver: Rc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
164}
165
166/// VM runtime value.
167#[derive(Debug, Clone)]
168pub enum VmValue {
169    Int(i64),
170    Float(f64),
171    String(Rc<str>),
172    Bool(bool),
173    Nil,
174    List(Rc<Vec<VmValue>>),
175    Dict(Rc<BTreeMap<String, VmValue>>),
176    Closure(Rc<VmClosure>),
177    /// Reference to a registered builtin function, used when a builtin name is
178    /// referenced as a value (e.g. `snake_dict.rekey(snake_to_camel)`). The
179    /// contained string is the builtin's registered name.
180    BuiltinRef(Rc<str>),
181    Duration(u64),
182    EnumVariant {
183        enum_name: String,
184        variant: String,
185        fields: Vec<VmValue>,
186    },
187    StructInstance {
188        struct_name: String,
189        fields: BTreeMap<String, VmValue>,
190    },
191    TaskHandle(String),
192    Channel(VmChannelHandle),
193    Atomic(VmAtomicHandle),
194    McpClient(VmMcpClientHandle),
195    Set(Rc<Vec<VmValue>>),
196    Generator(VmGenerator),
197    Range(VmRange),
198    /// Lazy iterator handle. Single-pass, fused. See `crate::vm::iter::VmIter`.
199    Iter(Rc<RefCell<crate::vm::iter::VmIter>>),
200    /// Two-element pair value. Produced by `pair(a, b)`, yielded by the
201    /// Dict iterator source, and (later) by `zip` / `enumerate` combinators.
202    /// Accessed via `.first` / `.second`, and destructurable in
203    /// `for (a, b) in ...` loops.
204    Pair(Rc<(VmValue, VmValue)>),
205}
206
207/// A compiled closure value.
208#[derive(Debug, Clone)]
209pub struct VmClosure {
210    pub func: CompiledFunction,
211    pub env: VmEnv,
212    /// Source directory for this closure's originating module.
213    /// When set, `render()` and other source-relative builtins resolve
214    /// paths relative to this directory instead of the entry pipeline.
215    pub source_dir: Option<PathBuf>,
216    /// Module-local named functions that should resolve before builtin fallback.
217    /// This lets selectively imported functions keep private sibling helpers
218    /// without exporting them into the caller's environment.
219    pub module_functions: Option<ModuleFunctionRegistry>,
220    /// Shared, mutable module-level env: holds top-level `var` / `let`
221    /// bindings declared at the module root (caches, counters, lazily
222    /// initialized registries). All closures created from the same
223    /// module import point at the same `Rc<RefCell<VmEnv>>`, so a
224    /// mutation inside one function is visible to every other function
225    /// in that module on subsequent calls. `closure.env` still holds
226    /// the per-closure lexical snapshot (captured function args from
227    /// enclosing scopes, etc.) and is unchanged by this — `module_state`
228    /// is a separate lookup layer consulted after the local env and
229    /// before globals. Created in `import_declarations` after the
230    /// module's init chunk runs, so the initial values from `var x = ...`
231    /// land in it.
232    pub module_state: Option<ModuleState>,
233}
234
235pub type ModuleFunctionRegistry = Rc<RefCell<BTreeMap<String, Rc<VmClosure>>>>;
236pub type ModuleState = Rc<RefCell<VmEnv>>;
237
238/// VM environment for variable storage.
239#[derive(Debug, Clone)]
240pub struct VmEnv {
241    pub(crate) scopes: Vec<Scope>,
242}
243
244#[derive(Debug, Clone)]
245pub(crate) struct Scope {
246    pub(crate) vars: BTreeMap<String, (VmValue, bool)>, // (value, mutable)
247}
248
249impl Default for VmEnv {
250    fn default() -> Self {
251        Self::new()
252    }
253}
254
255impl VmEnv {
256    pub fn new() -> Self {
257        Self {
258            scopes: vec![Scope {
259                vars: BTreeMap::new(),
260            }],
261        }
262    }
263
264    pub fn push_scope(&mut self) {
265        self.scopes.push(Scope {
266            vars: BTreeMap::new(),
267        });
268    }
269
270    pub fn pop_scope(&mut self) {
271        if self.scopes.len() > 1 {
272            self.scopes.pop();
273        }
274    }
275
276    pub fn scope_depth(&self) -> usize {
277        self.scopes.len()
278    }
279
280    pub fn truncate_scopes(&mut self, target_depth: usize) {
281        let min_depth = target_depth.max(1);
282        while self.scopes.len() > min_depth {
283            self.scopes.pop();
284        }
285    }
286
287    pub fn get(&self, name: &str) -> Option<VmValue> {
288        for scope in self.scopes.iter().rev() {
289            if let Some((val, _)) = scope.vars.get(name) {
290                return Some(val.clone());
291            }
292        }
293        None
294    }
295
296    pub fn define(&mut self, name: &str, value: VmValue, mutable: bool) -> Result<(), VmError> {
297        if let Some(scope) = self.scopes.last_mut() {
298            if let Some((_, existing_mutable)) = scope.vars.get(name) {
299                if !existing_mutable && !mutable {
300                    return Err(VmError::Runtime(format!(
301                        "Cannot redeclare immutable variable '{name}' in the same scope (use 'var' for mutable bindings)"
302                    )));
303                }
304            }
305            scope.vars.insert(name.to_string(), (value, mutable));
306        }
307        Ok(())
308    }
309
310    pub fn all_variables(&self) -> BTreeMap<String, VmValue> {
311        let mut vars = BTreeMap::new();
312        for scope in &self.scopes {
313            for (name, (value, _)) in &scope.vars {
314                vars.insert(name.clone(), value.clone());
315            }
316        }
317        vars
318    }
319
320    pub fn assign(&mut self, name: &str, value: VmValue) -> Result<(), VmError> {
321        for scope in self.scopes.iter_mut().rev() {
322            if let Some((_, mutable)) = scope.vars.get(name) {
323                if !mutable {
324                    return Err(VmError::ImmutableAssignment(name.to_string()));
325                }
326                scope.vars.insert(name.to_string(), (value, true));
327                return Ok(());
328            }
329        }
330        Err(VmError::UndefinedVariable(name.to_string()))
331    }
332}
333
334/// VM runtime errors.
335/// Compute Levenshtein edit distance between two strings.
336fn levenshtein(a: &str, b: &str) -> usize {
337    let a: Vec<char> = a.chars().collect();
338    let b: Vec<char> = b.chars().collect();
339    let (m, n) = (a.len(), b.len());
340    let mut prev = (0..=n).collect::<Vec<_>>();
341    let mut curr = vec![0; n + 1];
342    for i in 1..=m {
343        curr[0] = i;
344        for j in 1..=n {
345            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
346            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
347        }
348        std::mem::swap(&mut prev, &mut curr);
349    }
350    prev[n]
351}
352
353/// Find the closest match from a list of candidates using Levenshtein distance.
354/// Returns `Some(suggestion)` if a candidate is within `max_dist` edits.
355pub fn closest_match<'a>(name: &str, candidates: impl Iterator<Item = &'a str>) -> Option<String> {
356    let max_dist = match name.len() {
357        0..=2 => 1,
358        3..=5 => 2,
359        _ => 3,
360    };
361    candidates
362        .filter(|c| *c != name && !c.starts_with("__"))
363        .map(|c| (c, levenshtein(name, c)))
364        .filter(|(_, d)| *d <= max_dist)
365        // Prefer smallest distance, then closest length to original, then alphabetical
366        .min_by(|(a, da), (b, db)| {
367            da.cmp(db)
368                .then_with(|| {
369                    let a_diff = (a.len() as isize - name.len() as isize).unsigned_abs();
370                    let b_diff = (b.len() as isize - name.len() as isize).unsigned_abs();
371                    a_diff.cmp(&b_diff)
372                })
373                .then_with(|| a.cmp(b))
374        })
375        .map(|(c, _)| c.to_string())
376}
377
378#[derive(Debug, Clone)]
379pub enum VmError {
380    StackUnderflow,
381    StackOverflow,
382    UndefinedVariable(String),
383    UndefinedBuiltin(String),
384    ImmutableAssignment(String),
385    TypeError(String),
386    Runtime(String),
387    DivisionByZero,
388    Thrown(VmValue),
389    /// Thrown with error category for structured error handling.
390    CategorizedError {
391        message: String,
392        category: ErrorCategory,
393    },
394    Return(VmValue),
395    InvalidInstruction(u8),
396}
397
398/// Error categories for structured error handling in agent orchestration.
399#[derive(Debug, Clone, PartialEq, Eq)]
400pub enum ErrorCategory {
401    /// Network/connection timeout
402    Timeout,
403    /// Authentication/authorization failure
404    Auth,
405    /// Rate limit exceeded
406    RateLimit,
407    /// Tool execution failure
408    ToolError,
409    /// Tool was rejected by the host (not permitted / not in allowlist)
410    ToolRejected,
411    /// Operation was cancelled
412    Cancelled,
413    /// Resource not found
414    NotFound,
415    /// Circuit breaker is open
416    CircuitOpen,
417    /// Generic/unclassified error
418    Generic,
419}
420
421impl ErrorCategory {
422    pub fn as_str(&self) -> &'static str {
423        match self {
424            ErrorCategory::Timeout => "timeout",
425            ErrorCategory::Auth => "auth",
426            ErrorCategory::RateLimit => "rate_limit",
427            ErrorCategory::ToolError => "tool_error",
428            ErrorCategory::ToolRejected => "tool_rejected",
429            ErrorCategory::Cancelled => "cancelled",
430            ErrorCategory::NotFound => "not_found",
431            ErrorCategory::CircuitOpen => "circuit_open",
432            ErrorCategory::Generic => "generic",
433        }
434    }
435
436    pub fn parse(s: &str) -> Self {
437        match s {
438            "timeout" => ErrorCategory::Timeout,
439            "auth" => ErrorCategory::Auth,
440            "rate_limit" => ErrorCategory::RateLimit,
441            "tool_error" => ErrorCategory::ToolError,
442            "tool_rejected" => ErrorCategory::ToolRejected,
443            "cancelled" => ErrorCategory::Cancelled,
444            "not_found" => ErrorCategory::NotFound,
445            "circuit_open" => ErrorCategory::CircuitOpen,
446            _ => ErrorCategory::Generic,
447        }
448    }
449}
450
451/// Create a categorized error conveniently.
452pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
453    VmError::CategorizedError {
454        message: message.into(),
455        category,
456    }
457}
458
459/// Extract error category from a VmError.
460///
461/// Classification priority:
462/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
463/// 2. Thrown dict with a "category" field (user-created structured errors)
464/// 3. HTTP status code extraction (standard, unambiguous)
465/// 4. Deadline exceeded (VM-internal)
466/// 5. Fallback to Generic
467pub fn error_to_category(err: &VmError) -> ErrorCategory {
468    match err {
469        VmError::CategorizedError { category, .. } => category.clone(),
470        VmError::Thrown(VmValue::Dict(d)) => d
471            .get("category")
472            .map(|v| ErrorCategory::parse(&v.display()))
473            .unwrap_or(ErrorCategory::Generic),
474        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
475        VmError::Runtime(msg) => classify_error_message(msg),
476        _ => ErrorCategory::Generic,
477    }
478}
479
480/// Classify an error message using HTTP status codes and well-known patterns.
481/// Prefers unambiguous signals (status codes) over substring heuristics.
482fn classify_error_message(msg: &str) -> ErrorCategory {
483    // 1. HTTP status codes — most reliable signal
484    if let Some(cat) = classify_by_http_status(msg) {
485        return cat;
486    }
487    // 2. Well-known error identifiers from major APIs
488    //    (Anthropic, OpenAI, and standard HTTP patterns)
489    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
490        return ErrorCategory::Timeout;
491    }
492    if msg.contains("overloaded_error") || msg.contains("api_error") {
493        // Anthropic-specific error types
494        return ErrorCategory::RateLimit;
495    }
496    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
497        // OpenAI-specific error types
498        return ErrorCategory::RateLimit;
499    }
500    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
501        return ErrorCategory::Auth;
502    }
503    if msg.contains("not_found_error") || msg.contains("model_not_found") {
504        return ErrorCategory::NotFound;
505    }
506    if msg.contains("circuit_open") {
507        return ErrorCategory::CircuitOpen;
508    }
509    ErrorCategory::Generic
510}
511
512/// Classify errors by HTTP status code if one appears in the message.
513/// This is the most reliable classification method since status codes
514/// are standardized (RFC 9110) and unambiguous.
515fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
516    // Extract 3-digit HTTP status codes from common patterns:
517    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
518    for code in extract_http_status_codes(msg) {
519        return Some(match code {
520            401 | 403 => ErrorCategory::Auth,
521            404 | 410 => ErrorCategory::NotFound,
522            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
523            429 | 503 => ErrorCategory::RateLimit,
524            _ => continue,
525        });
526    }
527    None
528}
529
530/// Extract plausible HTTP status codes from an error message.
531fn extract_http_status_codes(msg: &str) -> Vec<u16> {
532    let mut codes = Vec::new();
533    let bytes = msg.as_bytes();
534    for i in 0..bytes.len().saturating_sub(2) {
535        // Look for 3-digit sequences in the 100-599 range
536        if bytes[i].is_ascii_digit()
537            && bytes[i + 1].is_ascii_digit()
538            && bytes[i + 2].is_ascii_digit()
539        {
540            // Ensure it's not part of a longer number
541            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
542            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
543            if before_ok && after_ok {
544                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
545                    if (400..=599).contains(&code) {
546                        codes.push(code);
547                    }
548                }
549            }
550        }
551    }
552    codes
553}
554
555impl std::fmt::Display for VmError {
556    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
557        match self {
558            VmError::StackUnderflow => write!(f, "Stack underflow"),
559            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
560            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
561            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
562            VmError::ImmutableAssignment(n) => {
563                write!(f, "Cannot assign to immutable binding: {n}")
564            }
565            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
566            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
567            VmError::DivisionByZero => write!(f, "Division by zero"),
568            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
569            VmError::CategorizedError { message, category } => {
570                write!(f, "Error [{}]: {}", category.as_str(), message)
571            }
572            VmError::Return(_) => write!(f, "Return from function"),
573            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
574        }
575    }
576}
577
578impl std::error::Error for VmError {}
579
580impl VmValue {
581    pub fn is_truthy(&self) -> bool {
582        match self {
583            VmValue::Bool(b) => *b,
584            VmValue::Nil => false,
585            VmValue::Int(n) => *n != 0,
586            VmValue::Float(n) => *n != 0.0,
587            VmValue::String(s) => !s.is_empty(),
588            VmValue::List(l) => !l.is_empty(),
589            VmValue::Dict(d) => !d.is_empty(),
590            VmValue::Closure(_) => true,
591            VmValue::BuiltinRef(_) => true,
592            VmValue::Duration(ms) => *ms > 0,
593            VmValue::EnumVariant { .. } => true,
594            VmValue::StructInstance { .. } => true,
595            VmValue::TaskHandle(_) => true,
596            VmValue::Channel(_) => true,
597            VmValue::Atomic(_) => true,
598            VmValue::McpClient(_) => true,
599            VmValue::Set(s) => !s.is_empty(),
600            VmValue::Generator(_) => true,
601            // Match Python semantics: range objects are always truthy,
602            // even the empty range (analogous to generators / iterators).
603            VmValue::Range(_) => true,
604            VmValue::Iter(_) => true,
605            VmValue::Pair(_) => true,
606        }
607    }
608
609    pub fn type_name(&self) -> &'static str {
610        match self {
611            VmValue::String(_) => "string",
612            VmValue::Int(_) => "int",
613            VmValue::Float(_) => "float",
614            VmValue::Bool(_) => "bool",
615            VmValue::Nil => "nil",
616            VmValue::List(_) => "list",
617            VmValue::Dict(_) => "dict",
618            VmValue::Closure(_) => "closure",
619            VmValue::BuiltinRef(_) => "builtin",
620            VmValue::Duration(_) => "duration",
621            VmValue::EnumVariant { .. } => "enum",
622            VmValue::StructInstance { .. } => "struct",
623            VmValue::TaskHandle(_) => "task_handle",
624            VmValue::Channel(_) => "channel",
625            VmValue::Atomic(_) => "atomic",
626            VmValue::McpClient(_) => "mcp_client",
627            VmValue::Set(_) => "set",
628            VmValue::Generator(_) => "generator",
629            VmValue::Range(_) => "range",
630            VmValue::Iter(_) => "iter",
631            VmValue::Pair(_) => "pair",
632        }
633    }
634
635    pub fn display(&self) -> String {
636        let mut out = String::new();
637        self.write_display(&mut out);
638        out
639    }
640
641    /// Writes the display representation directly into `out`,
642    /// avoiding intermediate Vec<String> allocations for collections.
643    pub fn write_display(&self, out: &mut String) {
644        use std::fmt::Write;
645        match self {
646            VmValue::Int(n) => {
647                let _ = write!(out, "{n}");
648            }
649            VmValue::Float(n) => {
650                if *n == (*n as i64) as f64 && n.abs() < 1e15 {
651                    let _ = write!(out, "{n:.1}");
652                } else {
653                    let _ = write!(out, "{n}");
654                }
655            }
656            VmValue::String(s) => out.push_str(s),
657            VmValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
658            VmValue::Nil => out.push_str("nil"),
659            VmValue::List(items) => {
660                out.push('[');
661                for (i, item) in items.iter().enumerate() {
662                    if i > 0 {
663                        out.push_str(", ");
664                    }
665                    item.write_display(out);
666                }
667                out.push(']');
668            }
669            VmValue::Dict(map) => {
670                out.push('{');
671                for (i, (k, v)) in map.iter().enumerate() {
672                    if i > 0 {
673                        out.push_str(", ");
674                    }
675                    out.push_str(k);
676                    out.push_str(": ");
677                    v.write_display(out);
678                }
679                out.push('}');
680            }
681            VmValue::Closure(c) => {
682                let _ = write!(out, "<fn({})>", c.func.params.join(", "));
683            }
684            VmValue::BuiltinRef(name) => {
685                let _ = write!(out, "<builtin {name}>");
686            }
687            VmValue::Duration(ms) => {
688                if *ms >= 3_600_000 && ms % 3_600_000 == 0 {
689                    let _ = write!(out, "{}h", ms / 3_600_000);
690                } else if *ms >= 60_000 && ms % 60_000 == 0 {
691                    let _ = write!(out, "{}m", ms / 60_000);
692                } else if *ms >= 1000 && ms % 1000 == 0 {
693                    let _ = write!(out, "{}s", ms / 1000);
694                } else {
695                    let _ = write!(out, "{}ms", ms);
696                }
697            }
698            VmValue::EnumVariant {
699                enum_name,
700                variant,
701                fields,
702            } => {
703                if fields.is_empty() {
704                    let _ = write!(out, "{enum_name}.{variant}");
705                } else {
706                    let _ = write!(out, "{enum_name}.{variant}(");
707                    for (i, v) in fields.iter().enumerate() {
708                        if i > 0 {
709                            out.push_str(", ");
710                        }
711                        v.write_display(out);
712                    }
713                    out.push(')');
714                }
715            }
716            VmValue::StructInstance {
717                struct_name,
718                fields,
719            } => {
720                let _ = write!(out, "{struct_name} {{");
721                for (i, (k, v)) in fields.iter().enumerate() {
722                    if i > 0 {
723                        out.push_str(", ");
724                    }
725                    out.push_str(k);
726                    out.push_str(": ");
727                    v.write_display(out);
728                }
729                out.push('}');
730            }
731            VmValue::TaskHandle(id) => {
732                let _ = write!(out, "<task:{id}>");
733            }
734            VmValue::Channel(ch) => {
735                let _ = write!(out, "<channel:{}>", ch.name);
736            }
737            VmValue::Atomic(a) => {
738                let _ = write!(out, "<atomic:{}>", a.value.load(Ordering::SeqCst));
739            }
740            VmValue::McpClient(c) => {
741                let _ = write!(out, "<mcp_client:{}>", c.name);
742            }
743            VmValue::Set(items) => {
744                out.push_str("set(");
745                for (i, item) in items.iter().enumerate() {
746                    if i > 0 {
747                        out.push_str(", ");
748                    }
749                    item.write_display(out);
750                }
751                out.push(')');
752            }
753            VmValue::Generator(g) => {
754                if g.done.get() {
755                    out.push_str("<generator (done)>");
756                } else {
757                    out.push_str("<generator>");
758                }
759            }
760            // Print form mirrors source syntax: `1 to 5` / `0 to 3 exclusive`.
761            // `.to_list()` is the explicit path to materialize for display.
762            VmValue::Range(r) => {
763                let _ = write!(out, "{} to {}", r.start, r.end);
764                if !r.inclusive {
765                    out.push_str(" exclusive");
766                }
767            }
768            VmValue::Iter(h) => {
769                if matches!(&*h.borrow(), crate::vm::iter::VmIter::Exhausted) {
770                    out.push_str("<iter (exhausted)>");
771                } else {
772                    out.push_str("<iter>");
773                }
774            }
775            VmValue::Pair(p) => {
776                out.push('(');
777                p.0.write_display(out);
778                out.push_str(", ");
779                p.1.write_display(out);
780                out.push(')');
781            }
782        }
783    }
784
785    /// Get the value as a BTreeMap reference, if it's a Dict.
786    pub fn as_dict(&self) -> Option<&BTreeMap<String, VmValue>> {
787        if let VmValue::Dict(d) = self {
788            Some(d)
789        } else {
790            None
791        }
792    }
793
794    pub fn as_int(&self) -> Option<i64> {
795        if let VmValue::Int(n) = self {
796            Some(*n)
797        } else {
798            None
799        }
800    }
801}
802
803/// Sync builtin function for the VM.
804pub type VmBuiltinFn = Rc<dyn Fn(&[VmValue], &mut String) -> Result<VmValue, VmError>>;
805
806/// Reference / identity equality. For heap-allocated refcounted values
807/// (List/Dict/Set/Closure) returns true only when both operands share the
808/// same underlying `Rc` allocation. For primitive scalars, falls back to
809/// structural equality (since primitives have no distinct identity).
810pub fn values_identical(a: &VmValue, b: &VmValue) -> bool {
811    match (a, b) {
812        (VmValue::List(x), VmValue::List(y)) => Rc::ptr_eq(x, y),
813        (VmValue::Dict(x), VmValue::Dict(y)) => Rc::ptr_eq(x, y),
814        (VmValue::Set(x), VmValue::Set(y)) => Rc::ptr_eq(x, y),
815        (VmValue::Closure(x), VmValue::Closure(y)) => Rc::ptr_eq(x, y),
816        (VmValue::String(x), VmValue::String(y)) => Rc::ptr_eq(x, y) || x == y,
817        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
818        (VmValue::Pair(x), VmValue::Pair(y)) => Rc::ptr_eq(x, y),
819        // Primitives: identity collapses to structural equality.
820        _ => values_equal(a, b),
821    }
822}
823
824/// Stable identity key for a value. Different allocations produce different
825/// keys; two values with the same heap identity produce the same key. For
826/// primitives the key is derived from the displayed value plus type name so
827/// logically-equal primitives always compare equal.
828pub fn value_identity_key(v: &VmValue) -> String {
829    match v {
830        VmValue::List(x) => format!("list@{:p}", Rc::as_ptr(x)),
831        VmValue::Dict(x) => format!("dict@{:p}", Rc::as_ptr(x)),
832        VmValue::Set(x) => format!("set@{:p}", Rc::as_ptr(x)),
833        VmValue::Closure(x) => format!("closure@{:p}", Rc::as_ptr(x)),
834        VmValue::String(x) => format!("string@{:p}", x.as_ptr()),
835        VmValue::BuiltinRef(name) => format!("builtin@{name}"),
836        other => format!("{}@{}", other.type_name(), other.display()),
837    }
838}
839
840/// Canonical string form used as the keying material for `hash_value`.
841/// Different types never collide (the type name is prepended) and collection
842/// order is preserved so structurally-equal values always produce the same
843/// key. Not intended for cross-process stability; depends on the in-process
844/// iteration order for collections (Dict uses BTreeMap so keys are sorted).
845pub fn value_structural_hash_key(v: &VmValue) -> String {
846    let mut out = String::new();
847    write_structural_hash_key(v, &mut out);
848    out
849}
850
851/// Writes the structural hash key for a value directly into `out`,
852/// avoiding intermediate allocations. Uses length-prefixed encoding
853/// for strings and dict keys to prevent separator collisions.
854fn write_structural_hash_key(v: &VmValue, out: &mut String) {
855    match v {
856        VmValue::Nil => out.push('N'),
857        VmValue::Bool(b) => {
858            out.push(if *b { 'T' } else { 'F' });
859        }
860        VmValue::Int(n) => {
861            out.push('i');
862            out.push_str(&n.to_string());
863            out.push(';');
864        }
865        VmValue::Float(n) => {
866            out.push('f');
867            out.push_str(&n.to_bits().to_string());
868            out.push(';');
869        }
870        VmValue::String(s) => {
871            // Length-prefixed: s<len>:<content> — no ambiguity from content
872            out.push('s');
873            out.push_str(&s.len().to_string());
874            out.push(':');
875            out.push_str(s);
876        }
877        VmValue::Duration(ms) => {
878            out.push('d');
879            out.push_str(&ms.to_string());
880            out.push(';');
881        }
882        VmValue::List(items) => {
883            out.push('L');
884            for item in items.iter() {
885                write_structural_hash_key(item, out);
886                out.push(',');
887            }
888            out.push(']');
889        }
890        VmValue::Dict(map) => {
891            out.push('D');
892            for (k, v) in map.iter() {
893                // Length-prefixed key
894                out.push_str(&k.len().to_string());
895                out.push(':');
896                out.push_str(k);
897                out.push('=');
898                write_structural_hash_key(v, out);
899                out.push(',');
900            }
901            out.push('}');
902        }
903        VmValue::Set(items) => {
904            // Sets need sorted keys for order-independence
905            let mut keys: Vec<String> = items.iter().map(value_structural_hash_key).collect();
906            keys.sort();
907            out.push('S');
908            for k in &keys {
909                out.push_str(k);
910                out.push(',');
911            }
912            out.push('}');
913        }
914        other => {
915            let tn = other.type_name();
916            out.push('o');
917            out.push_str(&tn.len().to_string());
918            out.push(':');
919            out.push_str(tn);
920            let d = other.display();
921            out.push_str(&d.len().to_string());
922            out.push(':');
923            out.push_str(&d);
924        }
925    }
926}
927
928pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
929    match (a, b) {
930        (VmValue::Int(x), VmValue::Int(y)) => x == y,
931        (VmValue::Float(x), VmValue::Float(y)) => x == y,
932        (VmValue::String(x), VmValue::String(y)) => x == y,
933        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
934        (VmValue::Nil, VmValue::Nil) => true,
935        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
936        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
937        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
938        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
939        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
940            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
941        }
942        (VmValue::List(a), VmValue::List(b)) => {
943            a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y))
944        }
945        (VmValue::Dict(a), VmValue::Dict(b)) => {
946            a.len() == b.len()
947                && a.iter()
948                    .zip(b.iter())
949                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
950        }
951        (
952            VmValue::EnumVariant {
953                enum_name: a_e,
954                variant: a_v,
955                fields: a_f,
956            },
957            VmValue::EnumVariant {
958                enum_name: b_e,
959                variant: b_v,
960                fields: b_f,
961            },
962        ) => {
963            a_e == b_e
964                && a_v == b_v
965                && a_f.len() == b_f.len()
966                && a_f.iter().zip(b_f.iter()).all(|(x, y)| values_equal(x, y))
967        }
968        (
969            VmValue::StructInstance {
970                struct_name: a_s,
971                fields: a_f,
972            },
973            VmValue::StructInstance {
974                struct_name: b_s,
975                fields: b_f,
976            },
977        ) => {
978            a_s == b_s
979                && a_f.len() == b_f.len()
980                && a_f
981                    .iter()
982                    .zip(b_f.iter())
983                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
984        }
985        (VmValue::Set(a), VmValue::Set(b)) => {
986            a.len() == b.len() && a.iter().all(|x| b.iter().any(|y| values_equal(x, y)))
987        }
988        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
989        (VmValue::Range(a), VmValue::Range(b)) => {
990            a.start == b.start && a.end == b.end && a.inclusive == b.inclusive
991        }
992        (VmValue::Iter(a), VmValue::Iter(b)) => Rc::ptr_eq(a, b),
993        (VmValue::Pair(a), VmValue::Pair(b)) => {
994            values_equal(&a.0, &b.0) && values_equal(&a.1, &b.1)
995        }
996        _ => false,
997    }
998}
999
1000pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
1001    match (a, b) {
1002        (VmValue::Int(x), VmValue::Int(y)) => x.cmp(y) as i32,
1003        (VmValue::Float(x), VmValue::Float(y)) => {
1004            if x < y {
1005                -1
1006            } else if x > y {
1007                1
1008            } else {
1009                0
1010            }
1011        }
1012        (VmValue::Int(x), VmValue::Float(y)) => {
1013            let x = *x as f64;
1014            if x < *y {
1015                -1
1016            } else if x > *y {
1017                1
1018            } else {
1019                0
1020            }
1021        }
1022        (VmValue::Float(x), VmValue::Int(y)) => {
1023            let y = *y as f64;
1024            if *x < y {
1025                -1
1026            } else if *x > y {
1027                1
1028            } else {
1029                0
1030            }
1031        }
1032        (VmValue::String(x), VmValue::String(y)) => x.cmp(y) as i32,
1033        (VmValue::Pair(x), VmValue::Pair(y)) => {
1034            let c = compare_values(&x.0, &y.0);
1035            if c != 0 {
1036                c
1037            } else {
1038                compare_values(&x.1, &y.1)
1039            }
1040        }
1041        _ => 0,
1042    }
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047    use super::*;
1048
1049    fn s(val: &str) -> VmValue {
1050        VmValue::String(Rc::from(val))
1051    }
1052    fn i(val: i64) -> VmValue {
1053        VmValue::Int(val)
1054    }
1055    fn list(items: Vec<VmValue>) -> VmValue {
1056        VmValue::List(Rc::new(items))
1057    }
1058    fn dict(pairs: Vec<(&str, VmValue)>) -> VmValue {
1059        VmValue::Dict(Rc::new(
1060            pairs.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
1061        ))
1062    }
1063
1064    #[test]
1065    fn hash_key_cross_type_distinct() {
1066        // Int(1) vs String("1") vs Bool(true) must all differ
1067        let k_int = value_structural_hash_key(&i(1));
1068        let k_str = value_structural_hash_key(&s("1"));
1069        let k_bool = value_structural_hash_key(&VmValue::Bool(true));
1070        assert_ne!(k_int, k_str);
1071        assert_ne!(k_int, k_bool);
1072        assert_ne!(k_str, k_bool);
1073    }
1074
1075    #[test]
1076    fn hash_key_string_with_separator_chars() {
1077        // ["a,string:b"] (1-element list) vs ["a", "b"] (2-element list)
1078        let one_elem = list(vec![s("a,string:b")]);
1079        let two_elem = list(vec![s("a"), s("b")]);
1080        assert_ne!(
1081            value_structural_hash_key(&one_elem),
1082            value_structural_hash_key(&two_elem),
1083            "length-prefixed strings must prevent separator collisions"
1084        );
1085    }
1086
1087    #[test]
1088    fn hash_key_dict_key_with_equals() {
1089        // Dict with key "a=b" vs dict with key "a" and value containing "b"
1090        let d1 = dict(vec![("a=b", i(1))]);
1091        let d2 = dict(vec![("a", i(1))]);
1092        assert_ne!(
1093            value_structural_hash_key(&d1),
1094            value_structural_hash_key(&d2)
1095        );
1096    }
1097
1098    #[test]
1099    fn hash_key_nested_list_vs_flat() {
1100        // [[1]] vs [1]
1101        let nested = list(vec![list(vec![i(1)])]);
1102        let flat = list(vec![i(1)]);
1103        assert_ne!(
1104            value_structural_hash_key(&nested),
1105            value_structural_hash_key(&flat)
1106        );
1107    }
1108
1109    #[test]
1110    fn hash_key_nil() {
1111        assert_eq!(
1112            value_structural_hash_key(&VmValue::Nil),
1113            value_structural_hash_key(&VmValue::Nil)
1114        );
1115    }
1116
1117    #[test]
1118    fn hash_key_float_zero_vs_neg_zero() {
1119        let pos = VmValue::Float(0.0);
1120        let neg = VmValue::Float(-0.0);
1121        // 0.0 and -0.0 have different bit representations
1122        assert_ne!(
1123            value_structural_hash_key(&pos),
1124            value_structural_hash_key(&neg)
1125        );
1126    }
1127
1128    #[test]
1129    fn hash_key_equal_values_match() {
1130        let a = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1131        let b = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1132        assert_eq!(value_structural_hash_key(&a), value_structural_hash_key(&b));
1133    }
1134
1135    #[test]
1136    fn hash_key_dict_with_comma_key() {
1137        let d1 = dict(vec![("a,b", i(1))]);
1138        let d2 = dict(vec![("a", i(1))]);
1139        assert_ne!(
1140            value_structural_hash_key(&d1),
1141            value_structural_hash_key(&d2)
1142        );
1143    }
1144
1145    // --- VmRange arithmetic safety at i64 boundaries ---
1146    //
1147    // These guard the saturating/checked arithmetic in `VmRange::len` and
1148    // `VmRange::get` / `VmRange::to_vec`. Before the saturating rewrite the
1149    // inclusive `i64::MIN to 0` case panicked in debug builds on
1150    // `(end - start) + 1`.
1151
1152    #[test]
1153    fn vm_range_len_inclusive_saturates_at_i64_max() {
1154        let r = VmRange {
1155            start: i64::MIN,
1156            end: 0,
1157            inclusive: true,
1158        };
1159        // True width overflows i64; saturating at i64::MAX keeps this total.
1160        assert_eq!(r.len(), i64::MAX);
1161    }
1162
1163    #[test]
1164    fn vm_range_len_exclusive_full_range_saturates() {
1165        let r = VmRange {
1166            start: i64::MIN,
1167            end: i64::MAX,
1168            inclusive: false,
1169        };
1170        assert_eq!(r.len(), i64::MAX);
1171    }
1172
1173    #[test]
1174    fn vm_range_len_inclusive_full_range_saturates() {
1175        let r = VmRange {
1176            start: i64::MIN,
1177            end: i64::MAX,
1178            inclusive: true,
1179        };
1180        assert_eq!(r.len(), i64::MAX);
1181    }
1182
1183    #[test]
1184    fn vm_range_get_near_max_does_not_overflow() {
1185        let r = VmRange {
1186            start: i64::MAX - 2,
1187            end: i64::MAX,
1188            inclusive: true,
1189        };
1190        assert_eq!(r.len(), 3);
1191        assert_eq!(r.get(0), Some(i64::MAX - 2));
1192        assert_eq!(r.get(2), Some(i64::MAX));
1193        assert_eq!(r.get(3), None);
1194    }
1195
1196    #[test]
1197    fn vm_range_reversed_is_empty() {
1198        let r = VmRange {
1199            start: 5,
1200            end: 1,
1201            inclusive: true,
1202        };
1203        assert!(r.is_empty());
1204        assert_eq!(r.len(), 0);
1205        assert_eq!(r.first(), None);
1206        assert_eq!(r.last(), None);
1207    }
1208
1209    #[test]
1210    fn vm_range_contains_near_bounds() {
1211        let r = VmRange {
1212            start: 1,
1213            end: 5,
1214            inclusive: true,
1215        };
1216        assert!(r.contains(1));
1217        assert!(r.contains(5));
1218        assert!(!r.contains(0));
1219        assert!(!r.contains(6));
1220        let r = VmRange {
1221            start: 1,
1222            end: 5,
1223            inclusive: false,
1224        };
1225        assert!(r.contains(1));
1226        assert!(r.contains(4));
1227        assert!(!r.contains(5));
1228    }
1229
1230    #[test]
1231    fn vm_range_to_vec_matches_direct_iteration() {
1232        let r = VmRange {
1233            start: -2,
1234            end: 2,
1235            inclusive: true,
1236        };
1237        let v = r.to_vec();
1238        assert_eq!(v.len(), 5);
1239        assert_eq!(
1240            v.iter()
1241                .map(|x| match x {
1242                    VmValue::Int(n) => *n,
1243                    _ => panic!("non-int in range"),
1244                })
1245                .collect::<Vec<_>>(),
1246            vec![-2, -1, 0, 1, 2]
1247        );
1248    }
1249}
harn_vm/value.rs

harn_vm/
value.rs