harn_vm/
value.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
4use std::sync::Arc;
5use std::{cell::RefCell, path::PathBuf};
6
7use crate::chunk::CompiledFunction;
8use crate::mcp::VmMcpClientHandle;
9
10/// An async builtin function for the VM.
11pub type VmAsyncBuiltinFn = Rc<
12    dyn Fn(
13        Vec<VmValue>,
14    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<VmValue, VmError>>>>,
15>;
16
17/// The raw join handle type for spawned tasks.
18pub type VmJoinHandle = tokio::task::JoinHandle<Result<(VmValue, String), VmError>>;
19
20/// A spawned async task handle with cancellation support.
21pub struct VmTaskHandle {
22    pub handle: VmJoinHandle,
23    /// Cooperative cancellation token. Set to true to request graceful shutdown.
24    pub cancel_token: Arc<AtomicBool>,
25}
26
27/// A channel handle for the VM (uses tokio mpsc).
28#[derive(Debug, Clone)]
29pub struct VmChannelHandle {
30    pub name: String,
31    pub sender: Arc<tokio::sync::mpsc::Sender<VmValue>>,
32    pub receiver: Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
33    pub closed: Arc<AtomicBool>,
34}
35
36/// An atomic integer handle for the VM.
37#[derive(Debug, Clone)]
38pub struct VmAtomicHandle {
39    pub value: Arc<AtomicI64>,
40}
41
42/// A lazy integer range — Python-style. Stores only `(start, end, inclusive)`
43/// so the in-memory footprint is O(1) regardless of the range's length.
44/// `len()`, indexing (`r[k]`), `.contains(x)`, `.first()`, `.last()` are all
45/// O(1); direct iteration walks step-by-step without materializing a list.
46///
47/// Empty-range convention (Python-consistent):
48/// - Inclusive empty when `start > end`.
49/// - Exclusive empty when `start >= end`.
50///
51/// Negative / reversed ranges are NOT supported in v1: `5 to 1` is simply
52/// empty. Authors who want reverse iteration should call `.to_list().reverse()`.
53#[derive(Debug, Clone, Copy)]
54pub struct VmRange {
55    pub start: i64,
56    pub end: i64,
57    pub inclusive: bool,
58}
59
60impl VmRange {
61    /// Number of elements this range yields.
62    ///
63    /// Uses saturating arithmetic so that pathological ranges near
64    /// `i64::MAX`/`i64::MIN` do not panic on overflow. Because a range's
65    /// element count must fit in `i64` the returned length saturates at
66    /// `i64::MAX` for ranges whose width exceeds that (e.g. `i64::MIN to
67    /// i64::MAX` inclusive). Callers that later narrow to `usize` for
68    /// allocation should still guard against huge lengths — see
69    /// `to_vec` / `get` for the indexable-range invariants.
70    pub fn len(&self) -> i64 {
71        if self.inclusive {
72            if self.start > self.end {
73                0
74            } else {
75                self.end.saturating_sub(self.start).saturating_add(1)
76            }
77        } else if self.start >= self.end {
78            0
79        } else {
80            self.end.saturating_sub(self.start)
81        }
82    }
83
84    pub fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Element at the given 0-based index, bounds-checked.
89    /// Returns `None` when out of bounds or when `start + idx` would
90    /// overflow (which can only happen when `len()` saturated).
91    pub fn get(&self, idx: i64) -> Option<i64> {
92        if idx < 0 || idx >= self.len() {
93            None
94        } else {
95            self.start.checked_add(idx)
96        }
97    }
98
99    /// First element or `None` when empty.
100    pub fn first(&self) -> Option<i64> {
101        if self.is_empty() {
102            None
103        } else {
104            Some(self.start)
105        }
106    }
107
108    /// Last element or `None` when empty.
109    pub fn last(&self) -> Option<i64> {
110        if self.is_empty() {
111            None
112        } else if self.inclusive {
113            Some(self.end)
114        } else {
115            Some(self.end - 1)
116        }
117    }
118
119    /// Whether `v` falls inside the range (O(1)).
120    pub fn contains(&self, v: i64) -> bool {
121        if self.is_empty() {
122            return false;
123        }
124        if self.inclusive {
125            v >= self.start && v <= self.end
126        } else {
127            v >= self.start && v < self.end
128        }
129    }
130
131    /// Materialize to a `Vec<VmValue>` — the explicit escape hatch.
132    ///
133    /// Uses `checked_add` on the per-element index so a range near
134    /// `i64::MAX` stops at the representable bound instead of panicking.
135    /// Callers should still treat a very long range as unwise to
136    /// materialize (the whole point of `VmRange` is to avoid this).
137    pub fn to_vec(&self) -> Vec<VmValue> {
138        let len = self.len();
139        if len <= 0 {
140            return Vec::new();
141        }
142        let cap = len as usize;
143        let mut out = Vec::with_capacity(cap);
144        for i in 0..len {
145            match self.start.checked_add(i) {
146                Some(v) => out.push(VmValue::Int(v)),
147                None => break,
148            }
149        }
150        out
151    }
152}
153
154/// A generator object: lazily produces values via yield.
155/// The generator body runs as a spawned task that sends values through a channel.
156#[derive(Debug, Clone)]
157pub struct VmGenerator {
158    /// Whether the generator has finished (returned or exhausted).
159    pub done: Rc<std::cell::Cell<bool>>,
160    /// Receiver end of the yield channel (generator sends values here).
161    /// Wrapped in a shared async mutex so recv() can be called without holding
162    /// a RefCell borrow across await points.
163    pub receiver: Rc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
164}
165
166/// VM runtime value.
167#[derive(Debug, Clone)]
168pub enum VmValue {
169    Int(i64),
170    Float(f64),
171    String(Rc<str>),
172    Bool(bool),
173    Nil,
174    List(Rc<Vec<VmValue>>),
175    Dict(Rc<BTreeMap<String, VmValue>>),
176    Closure(Rc<VmClosure>),
177    /// Reference to a registered builtin function, used when a builtin name is
178    /// referenced as a value (e.g. `snake_dict.rekey(snake_to_camel)`). The
179    /// contained string is the builtin's registered name.
180    BuiltinRef(Rc<str>),
181    Duration(u64),
182    EnumVariant {
183        enum_name: String,
184        variant: String,
185        fields: Vec<VmValue>,
186    },
187    StructInstance {
188        struct_name: String,
189        fields: BTreeMap<String, VmValue>,
190    },
191    TaskHandle(String),
192    Channel(VmChannelHandle),
193    Atomic(VmAtomicHandle),
194    McpClient(VmMcpClientHandle),
195    Set(Rc<Vec<VmValue>>),
196    Generator(VmGenerator),
197    Range(VmRange),
198    /// Lazy iterator handle. Single-pass, fused. See `crate::vm::iter::VmIter`.
199    Iter(Rc<RefCell<crate::vm::iter::VmIter>>),
200    /// Two-element pair value. Produced by `pair(a, b)`, yielded by the
201    /// Dict iterator source, and (later) by `zip` / `enumerate` combinators.
202    /// Accessed via `.first` / `.second`, and destructurable in
203    /// `for (a, b) in ...` loops.
204    Pair(Rc<(VmValue, VmValue)>),
205}
206
207/// A compiled closure value.
208#[derive(Debug, Clone)]
209pub struct VmClosure {
210    pub func: CompiledFunction,
211    pub env: VmEnv,
212    /// Source directory for this closure's originating module.
213    /// When set, `render()` and other source-relative builtins resolve
214    /// paths relative to this directory instead of the entry pipeline.
215    pub source_dir: Option<PathBuf>,
216    /// Module-local named functions that should resolve before builtin fallback.
217    /// This lets selectively imported functions keep private sibling helpers
218    /// without exporting them into the caller's environment.
219    pub module_functions: Option<ModuleFunctionRegistry>,
220    /// Shared, mutable module-level env: holds top-level `var` / `let`
221    /// bindings declared at the module root (caches, counters, lazily
222    /// initialized registries). All closures created from the same
223    /// module import point at the same `Rc<RefCell<VmEnv>>`, so a
224    /// mutation inside one function is visible to every other function
225    /// in that module on subsequent calls. `closure.env` still holds
226    /// the per-closure lexical snapshot (captured function args from
227    /// enclosing scopes, etc.) and is unchanged by this — `module_state`
228    /// is a separate lookup layer consulted after the local env and
229    /// before globals. Created in `import_declarations` after the
230    /// module's init chunk runs, so the initial values from `var x = ...`
231    /// land in it.
232    pub module_state: Option<ModuleState>,
233}
234
235pub type ModuleFunctionRegistry = Rc<RefCell<BTreeMap<String, Rc<VmClosure>>>>;
236pub type ModuleState = Rc<RefCell<VmEnv>>;
237
238/// VM environment for variable storage.
239#[derive(Debug, Clone)]
240pub struct VmEnv {
241    pub(crate) scopes: Vec<Scope>,
242}
243
244#[derive(Debug, Clone)]
245pub(crate) struct Scope {
246    pub(crate) vars: BTreeMap<String, (VmValue, bool)>, // (value, mutable)
247}
248
249impl Default for VmEnv {
250    fn default() -> Self {
251        Self::new()
252    }
253}
254
255impl VmEnv {
256    pub fn new() -> Self {
257        Self {
258            scopes: vec![Scope {
259                vars: BTreeMap::new(),
260            }],
261        }
262    }
263
264    pub fn push_scope(&mut self) {
265        self.scopes.push(Scope {
266            vars: BTreeMap::new(),
267        });
268    }
269
270    pub fn pop_scope(&mut self) {
271        if self.scopes.len() > 1 {
272            self.scopes.pop();
273        }
274    }
275
276    pub fn scope_depth(&self) -> usize {
277        self.scopes.len()
278    }
279
280    pub fn truncate_scopes(&mut self, target_depth: usize) {
281        let min_depth = target_depth.max(1);
282        while self.scopes.len() > min_depth {
283            self.scopes.pop();
284        }
285    }
286
287    pub fn get(&self, name: &str) -> Option<VmValue> {
288        for scope in self.scopes.iter().rev() {
289            if let Some((val, _)) = scope.vars.get(name) {
290                return Some(val.clone());
291            }
292        }
293        None
294    }
295
296    pub fn define(&mut self, name: &str, value: VmValue, mutable: bool) -> Result<(), VmError> {
297        if let Some(scope) = self.scopes.last_mut() {
298            if let Some((_, existing_mutable)) = scope.vars.get(name) {
299                if !existing_mutable && !mutable {
300                    return Err(VmError::Runtime(format!(
301                        "Cannot redeclare immutable variable '{name}' in the same scope (use 'var' for mutable bindings)"
302                    )));
303                }
304            }
305            scope.vars.insert(name.to_string(), (value, mutable));
306        }
307        Ok(())
308    }
309
310    pub fn all_variables(&self) -> BTreeMap<String, VmValue> {
311        let mut vars = BTreeMap::new();
312        for scope in &self.scopes {
313            for (name, (value, _)) in &scope.vars {
314                vars.insert(name.clone(), value.clone());
315            }
316        }
317        vars
318    }
319
320    pub fn assign(&mut self, name: &str, value: VmValue) -> Result<(), VmError> {
321        for scope in self.scopes.iter_mut().rev() {
322            if let Some((_, mutable)) = scope.vars.get(name) {
323                if !mutable {
324                    return Err(VmError::ImmutableAssignment(name.to_string()));
325                }
326                scope.vars.insert(name.to_string(), (value, true));
327                return Ok(());
328            }
329        }
330        Err(VmError::UndefinedVariable(name.to_string()))
331    }
332}
333
334/// Compute Levenshtein edit distance between two strings.
335fn levenshtein(a: &str, b: &str) -> usize {
336    let a: Vec<char> = a.chars().collect();
337    let b: Vec<char> = b.chars().collect();
338    let (m, n) = (a.len(), b.len());
339    let mut prev = (0..=n).collect::<Vec<_>>();
340    let mut curr = vec![0; n + 1];
341    for i in 1..=m {
342        curr[0] = i;
343        for j in 1..=n {
344            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
345            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
346        }
347        std::mem::swap(&mut prev, &mut curr);
348    }
349    prev[n]
350}
351
352/// Find the closest match from a list of candidates using Levenshtein distance.
353/// Returns `Some(suggestion)` if a candidate is within `max_dist` edits.
354pub fn closest_match<'a>(name: &str, candidates: impl Iterator<Item = &'a str>) -> Option<String> {
355    let max_dist = match name.len() {
356        0..=2 => 1,
357        3..=5 => 2,
358        _ => 3,
359    };
360    candidates
361        .filter(|c| *c != name && !c.starts_with("__"))
362        .map(|c| (c, levenshtein(name, c)))
363        .filter(|(_, d)| *d <= max_dist)
364        // Prefer smallest distance, then closest length to original, then alphabetical
365        .min_by(|(a, da), (b, db)| {
366            da.cmp(db)
367                .then_with(|| {
368                    let a_diff = (a.len() as isize - name.len() as isize).unsigned_abs();
369                    let b_diff = (b.len() as isize - name.len() as isize).unsigned_abs();
370                    a_diff.cmp(&b_diff)
371                })
372                .then_with(|| a.cmp(b))
373        })
374        .map(|(c, _)| c.to_string())
375}
376
377#[derive(Debug, Clone)]
378pub enum VmError {
379    StackUnderflow,
380    StackOverflow,
381    UndefinedVariable(String),
382    UndefinedBuiltin(String),
383    ImmutableAssignment(String),
384    TypeError(String),
385    Runtime(String),
386    DivisionByZero,
387    Thrown(VmValue),
388    /// Thrown with error category for structured error handling.
389    CategorizedError {
390        message: String,
391        category: ErrorCategory,
392    },
393    Return(VmValue),
394    InvalidInstruction(u8),
395}
396
397/// Error categories for structured error handling in agent orchestration.
398#[derive(Debug, Clone, PartialEq, Eq)]
399pub enum ErrorCategory {
400    /// Network/connection timeout
401    Timeout,
402    /// Authentication/authorization failure
403    Auth,
404    /// Rate limit exceeded (HTTP 429 / quota)
405    RateLimit,
406    /// Upstream provider is overloaded (HTTP 503 / 529).
407    /// Distinct from RateLimit: the client hasn't exceeded a quota — the
408    /// provider is shedding load and will recover on its own.
409    Overloaded,
410    /// Provider-side 5xx error (500, 502) that isn't specifically overload.
411    ServerError,
412    /// Network-level transient failure (connection reset, DNS hiccup,
413    /// partial stream) — retryable but not provider-status-coded.
414    TransientNetwork,
415    /// LLM output failed schema validation. Retryable via `schema_retries`.
416    SchemaValidation,
417    /// Tool execution failure
418    ToolError,
419    /// Tool was rejected by the host (not permitted / not in allowlist)
420    ToolRejected,
421    /// Operation was cancelled
422    Cancelled,
423    /// Resource not found
424    NotFound,
425    /// Circuit breaker is open
426    CircuitOpen,
427    /// Generic/unclassified error
428    Generic,
429}
430
431impl ErrorCategory {
432    pub fn as_str(&self) -> &'static str {
433        match self {
434            ErrorCategory::Timeout => "timeout",
435            ErrorCategory::Auth => "auth",
436            ErrorCategory::RateLimit => "rate_limit",
437            ErrorCategory::Overloaded => "overloaded",
438            ErrorCategory::ServerError => "server_error",
439            ErrorCategory::TransientNetwork => "transient_network",
440            ErrorCategory::SchemaValidation => "schema_validation",
441            ErrorCategory::ToolError => "tool_error",
442            ErrorCategory::ToolRejected => "tool_rejected",
443            ErrorCategory::Cancelled => "cancelled",
444            ErrorCategory::NotFound => "not_found",
445            ErrorCategory::CircuitOpen => "circuit_open",
446            ErrorCategory::Generic => "generic",
447        }
448    }
449
450    pub fn parse(s: &str) -> Self {
451        match s {
452            "timeout" => ErrorCategory::Timeout,
453            "auth" => ErrorCategory::Auth,
454            "rate_limit" => ErrorCategory::RateLimit,
455            "overloaded" => ErrorCategory::Overloaded,
456            "server_error" => ErrorCategory::ServerError,
457            "transient_network" => ErrorCategory::TransientNetwork,
458            "schema_validation" => ErrorCategory::SchemaValidation,
459            "tool_error" => ErrorCategory::ToolError,
460            "tool_rejected" => ErrorCategory::ToolRejected,
461            "cancelled" => ErrorCategory::Cancelled,
462            "not_found" => ErrorCategory::NotFound,
463            "circuit_open" => ErrorCategory::CircuitOpen,
464            _ => ErrorCategory::Generic,
465        }
466    }
467
468    /// Whether an error of this category is worth retrying for a transient
469    /// provider-side reason. Agent loops consult this to decide whether to
470    /// back off and retry vs surface the error to the user.
471    pub fn is_transient(&self) -> bool {
472        matches!(
473            self,
474            ErrorCategory::Timeout
475                | ErrorCategory::RateLimit
476                | ErrorCategory::Overloaded
477                | ErrorCategory::ServerError
478                | ErrorCategory::TransientNetwork
479        )
480    }
481}
482
483/// Create a categorized error conveniently.
484pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
485    VmError::CategorizedError {
486        message: message.into(),
487        category,
488    }
489}
490
491/// Extract error category from a VmError.
492///
493/// Classification priority:
494/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
495/// 2. Thrown dict with a "category" field (user-created structured errors)
496/// 3. HTTP status code extraction (standard, unambiguous)
497/// 4. Deadline exceeded (VM-internal)
498/// 5. Fallback to Generic
499pub fn error_to_category(err: &VmError) -> ErrorCategory {
500    match err {
501        VmError::CategorizedError { category, .. } => category.clone(),
502        VmError::Thrown(VmValue::Dict(d)) => d
503            .get("category")
504            .map(|v| ErrorCategory::parse(&v.display()))
505            .unwrap_or(ErrorCategory::Generic),
506        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
507        VmError::Runtime(msg) => classify_error_message(msg),
508        _ => ErrorCategory::Generic,
509    }
510}
511
512/// Classify an error message using HTTP status codes and well-known patterns.
513/// Prefers unambiguous signals (status codes) over substring heuristics.
514pub fn classify_error_message(msg: &str) -> ErrorCategory {
515    // 1. HTTP status codes — most reliable signal
516    if let Some(cat) = classify_by_http_status(msg) {
517        return cat;
518    }
519    // 2. Well-known error identifiers from major APIs
520    //    (Anthropic, OpenAI, and standard HTTP patterns)
521    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
522        return ErrorCategory::Timeout;
523    }
524    if msg.contains("overloaded_error") {
525        // Anthropic overloaded_error surfaces as HTTP 529.
526        return ErrorCategory::Overloaded;
527    }
528    if msg.contains("api_error") {
529        // Anthropic catch-all server-side error.
530        return ErrorCategory::ServerError;
531    }
532    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
533        // OpenAI-specific quota error types.
534        return ErrorCategory::RateLimit;
535    }
536    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
537        return ErrorCategory::Auth;
538    }
539    if msg.contains("not_found_error") || msg.contains("model_not_found") {
540        return ErrorCategory::NotFound;
541    }
542    if msg.contains("circuit_open") {
543        return ErrorCategory::CircuitOpen;
544    }
545    // Network-level transient patterns (pre-HTTP-status, pre-provider-framing).
546    let lower = msg.to_lowercase();
547    if lower.contains("connection reset")
548        || lower.contains("connection refused")
549        || lower.contains("connection closed")
550        || lower.contains("broken pipe")
551        || lower.contains("dns error")
552        || lower.contains("stream error")
553        || lower.contains("unexpected eof")
554    {
555        return ErrorCategory::TransientNetwork;
556    }
557    ErrorCategory::Generic
558}
559
560/// Classify errors by HTTP status code if one appears in the message.
561/// This is the most reliable classification method since status codes
562/// are standardized (RFC 9110) and unambiguous.
563fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
564    // Extract 3-digit HTTP status codes from common patterns:
565    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
566    for code in extract_http_status_codes(msg) {
567        return Some(match code {
568            401 | 403 => ErrorCategory::Auth,
569            404 | 410 => ErrorCategory::NotFound,
570            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
571            429 => ErrorCategory::RateLimit,
572            503 | 529 => ErrorCategory::Overloaded,
573            500 | 502 => ErrorCategory::ServerError,
574            _ => continue,
575        });
576    }
577    None
578}
579
580/// Extract plausible HTTP status codes from an error message.
581fn extract_http_status_codes(msg: &str) -> Vec<u16> {
582    let mut codes = Vec::new();
583    let bytes = msg.as_bytes();
584    for i in 0..bytes.len().saturating_sub(2) {
585        // Look for 3-digit sequences in the 100-599 range
586        if bytes[i].is_ascii_digit()
587            && bytes[i + 1].is_ascii_digit()
588            && bytes[i + 2].is_ascii_digit()
589        {
590            // Ensure it's not part of a longer number
591            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
592            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
593            if before_ok && after_ok {
594                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
595                    if (400..=599).contains(&code) {
596                        codes.push(code);
597                    }
598                }
599            }
600        }
601    }
602    codes
603}
604
605impl std::fmt::Display for VmError {
606    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
607        match self {
608            VmError::StackUnderflow => write!(f, "Stack underflow"),
609            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
610            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
611            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
612            VmError::ImmutableAssignment(n) => {
613                write!(f, "Cannot assign to immutable binding: {n}")
614            }
615            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
616            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
617            VmError::DivisionByZero => write!(f, "Division by zero"),
618            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
619            VmError::CategorizedError { message, category } => {
620                write!(f, "Error [{}]: {}", category.as_str(), message)
621            }
622            VmError::Return(_) => write!(f, "Return from function"),
623            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
624        }
625    }
626}
627
628impl std::error::Error for VmError {}
629
630impl VmValue {
631    pub fn is_truthy(&self) -> bool {
632        match self {
633            VmValue::Bool(b) => *b,
634            VmValue::Nil => false,
635            VmValue::Int(n) => *n != 0,
636            VmValue::Float(n) => *n != 0.0,
637            VmValue::String(s) => !s.is_empty(),
638            VmValue::List(l) => !l.is_empty(),
639            VmValue::Dict(d) => !d.is_empty(),
640            VmValue::Closure(_) => true,
641            VmValue::BuiltinRef(_) => true,
642            VmValue::Duration(ms) => *ms > 0,
643            VmValue::EnumVariant { .. } => true,
644            VmValue::StructInstance { .. } => true,
645            VmValue::TaskHandle(_) => true,
646            VmValue::Channel(_) => true,
647            VmValue::Atomic(_) => true,
648            VmValue::McpClient(_) => true,
649            VmValue::Set(s) => !s.is_empty(),
650            VmValue::Generator(_) => true,
651            // Match Python semantics: range objects are always truthy,
652            // even the empty range (analogous to generators / iterators).
653            VmValue::Range(_) => true,
654            VmValue::Iter(_) => true,
655            VmValue::Pair(_) => true,
656        }
657    }
658
659    pub fn type_name(&self) -> &'static str {
660        match self {
661            VmValue::String(_) => "string",
662            VmValue::Int(_) => "int",
663            VmValue::Float(_) => "float",
664            VmValue::Bool(_) => "bool",
665            VmValue::Nil => "nil",
666            VmValue::List(_) => "list",
667            VmValue::Dict(_) => "dict",
668            VmValue::Closure(_) => "closure",
669            VmValue::BuiltinRef(_) => "builtin",
670            VmValue::Duration(_) => "duration",
671            VmValue::EnumVariant { .. } => "enum",
672            VmValue::StructInstance { .. } => "struct",
673            VmValue::TaskHandle(_) => "task_handle",
674            VmValue::Channel(_) => "channel",
675            VmValue::Atomic(_) => "atomic",
676            VmValue::McpClient(_) => "mcp_client",
677            VmValue::Set(_) => "set",
678            VmValue::Generator(_) => "generator",
679            VmValue::Range(_) => "range",
680            VmValue::Iter(_) => "iter",
681            VmValue::Pair(_) => "pair",
682        }
683    }
684
685    pub fn display(&self) -> String {
686        let mut out = String::new();
687        self.write_display(&mut out);
688        out
689    }
690
691    /// Writes the display representation directly into `out`,
692    /// avoiding intermediate Vec<String> allocations for collections.
693    pub fn write_display(&self, out: &mut String) {
694        use std::fmt::Write;
695        match self {
696            VmValue::Int(n) => {
697                let _ = write!(out, "{n}");
698            }
699            VmValue::Float(n) => {
700                if *n == (*n as i64) as f64 && n.abs() < 1e15 {
701                    let _ = write!(out, "{n:.1}");
702                } else {
703                    let _ = write!(out, "{n}");
704                }
705            }
706            VmValue::String(s) => out.push_str(s),
707            VmValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
708            VmValue::Nil => out.push_str("nil"),
709            VmValue::List(items) => {
710                out.push('[');
711                for (i, item) in items.iter().enumerate() {
712                    if i > 0 {
713                        out.push_str(", ");
714                    }
715                    item.write_display(out);
716                }
717                out.push(']');
718            }
719            VmValue::Dict(map) => {
720                out.push('{');
721                for (i, (k, v)) in map.iter().enumerate() {
722                    if i > 0 {
723                        out.push_str(", ");
724                    }
725                    out.push_str(k);
726                    out.push_str(": ");
727                    v.write_display(out);
728                }
729                out.push('}');
730            }
731            VmValue::Closure(c) => {
732                let _ = write!(out, "<fn({})>", c.func.params.join(", "));
733            }
734            VmValue::BuiltinRef(name) => {
735                let _ = write!(out, "<builtin {name}>");
736            }
737            VmValue::Duration(ms) => {
738                if *ms >= 3_600_000 && ms % 3_600_000 == 0 {
739                    let _ = write!(out, "{}h", ms / 3_600_000);
740                } else if *ms >= 60_000 && ms % 60_000 == 0 {
741                    let _ = write!(out, "{}m", ms / 60_000);
742                } else if *ms >= 1000 && ms % 1000 == 0 {
743                    let _ = write!(out, "{}s", ms / 1000);
744                } else {
745                    let _ = write!(out, "{}ms", ms);
746                }
747            }
748            VmValue::EnumVariant {
749                enum_name,
750                variant,
751                fields,
752            } => {
753                if fields.is_empty() {
754                    let _ = write!(out, "{enum_name}.{variant}");
755                } else {
756                    let _ = write!(out, "{enum_name}.{variant}(");
757                    for (i, v) in fields.iter().enumerate() {
758                        if i > 0 {
759                            out.push_str(", ");
760                        }
761                        v.write_display(out);
762                    }
763                    out.push(')');
764                }
765            }
766            VmValue::StructInstance {
767                struct_name,
768                fields,
769            } => {
770                let _ = write!(out, "{struct_name} {{");
771                for (i, (k, v)) in fields.iter().enumerate() {
772                    if i > 0 {
773                        out.push_str(", ");
774                    }
775                    out.push_str(k);
776                    out.push_str(": ");
777                    v.write_display(out);
778                }
779                out.push('}');
780            }
781            VmValue::TaskHandle(id) => {
782                let _ = write!(out, "<task:{id}>");
783            }
784            VmValue::Channel(ch) => {
785                let _ = write!(out, "<channel:{}>", ch.name);
786            }
787            VmValue::Atomic(a) => {
788                let _ = write!(out, "<atomic:{}>", a.value.load(Ordering::SeqCst));
789            }
790            VmValue::McpClient(c) => {
791                let _ = write!(out, "<mcp_client:{}>", c.name);
792            }
793            VmValue::Set(items) => {
794                out.push_str("set(");
795                for (i, item) in items.iter().enumerate() {
796                    if i > 0 {
797                        out.push_str(", ");
798                    }
799                    item.write_display(out);
800                }
801                out.push(')');
802            }
803            VmValue::Generator(g) => {
804                if g.done.get() {
805                    out.push_str("<generator (done)>");
806                } else {
807                    out.push_str("<generator>");
808                }
809            }
810            // Print form mirrors source syntax: `1 to 5` / `0 to 3 exclusive`.
811            // `.to_list()` is the explicit path to materialize for display.
812            VmValue::Range(r) => {
813                let _ = write!(out, "{} to {}", r.start, r.end);
814                if !r.inclusive {
815                    out.push_str(" exclusive");
816                }
817            }
818            VmValue::Iter(h) => {
819                if matches!(&*h.borrow(), crate::vm::iter::VmIter::Exhausted) {
820                    out.push_str("<iter (exhausted)>");
821                } else {
822                    out.push_str("<iter>");
823                }
824            }
825            VmValue::Pair(p) => {
826                out.push('(');
827                p.0.write_display(out);
828                out.push_str(", ");
829                p.1.write_display(out);
830                out.push(')');
831            }
832        }
833    }
834
835    /// Get the value as a BTreeMap reference, if it's a Dict.
836    pub fn as_dict(&self) -> Option<&BTreeMap<String, VmValue>> {
837        if let VmValue::Dict(d) = self {
838            Some(d)
839        } else {
840            None
841        }
842    }
843
844    pub fn as_int(&self) -> Option<i64> {
845        if let VmValue::Int(n) = self {
846            Some(*n)
847        } else {
848            None
849        }
850    }
851}
852
853/// Sync builtin function for the VM.
854pub type VmBuiltinFn = Rc<dyn Fn(&[VmValue], &mut String) -> Result<VmValue, VmError>>;
855
856/// Reference / identity equality. For heap-allocated refcounted values
857/// (List/Dict/Set/Closure) returns true only when both operands share the
858/// same underlying `Rc` allocation. For primitive scalars, falls back to
859/// structural equality (since primitives have no distinct identity).
860pub fn values_identical(a: &VmValue, b: &VmValue) -> bool {
861    match (a, b) {
862        (VmValue::List(x), VmValue::List(y)) => Rc::ptr_eq(x, y),
863        (VmValue::Dict(x), VmValue::Dict(y)) => Rc::ptr_eq(x, y),
864        (VmValue::Set(x), VmValue::Set(y)) => Rc::ptr_eq(x, y),
865        (VmValue::Closure(x), VmValue::Closure(y)) => Rc::ptr_eq(x, y),
866        (VmValue::String(x), VmValue::String(y)) => Rc::ptr_eq(x, y) || x == y,
867        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
868        (VmValue::Pair(x), VmValue::Pair(y)) => Rc::ptr_eq(x, y),
869        // Primitives: identity collapses to structural equality.
870        _ => values_equal(a, b),
871    }
872}
873
874/// Stable identity key for a value. Different allocations produce different
875/// keys; two values with the same heap identity produce the same key. For
876/// primitives the key is derived from the displayed value plus type name so
877/// logically-equal primitives always compare equal.
878pub fn value_identity_key(v: &VmValue) -> String {
879    match v {
880        VmValue::List(x) => format!("list@{:p}", Rc::as_ptr(x)),
881        VmValue::Dict(x) => format!("dict@{:p}", Rc::as_ptr(x)),
882        VmValue::Set(x) => format!("set@{:p}", Rc::as_ptr(x)),
883        VmValue::Closure(x) => format!("closure@{:p}", Rc::as_ptr(x)),
884        VmValue::String(x) => format!("string@{:p}", x.as_ptr()),
885        VmValue::BuiltinRef(name) => format!("builtin@{name}"),
886        other => format!("{}@{}", other.type_name(), other.display()),
887    }
888}
889
890/// Canonical string form used as the keying material for `hash_value`.
891/// Different types never collide (the type name is prepended) and collection
892/// order is preserved so structurally-equal values always produce the same
893/// key. Not intended for cross-process stability; depends on the in-process
894/// iteration order for collections (Dict uses BTreeMap so keys are sorted).
895pub fn value_structural_hash_key(v: &VmValue) -> String {
896    let mut out = String::new();
897    write_structural_hash_key(v, &mut out);
898    out
899}
900
901/// Writes the structural hash key for a value directly into `out`,
902/// avoiding intermediate allocations. Uses length-prefixed encoding
903/// for strings and dict keys to prevent separator collisions.
904fn write_structural_hash_key(v: &VmValue, out: &mut String) {
905    match v {
906        VmValue::Nil => out.push('N'),
907        VmValue::Bool(b) => {
908            out.push(if *b { 'T' } else { 'F' });
909        }
910        VmValue::Int(n) => {
911            out.push('i');
912            out.push_str(&n.to_string());
913            out.push(';');
914        }
915        VmValue::Float(n) => {
916            out.push('f');
917            out.push_str(&n.to_bits().to_string());
918            out.push(';');
919        }
920        VmValue::String(s) => {
921            // Length-prefixed: s<len>:<content> — no ambiguity from content
922            out.push('s');
923            out.push_str(&s.len().to_string());
924            out.push(':');
925            out.push_str(s);
926        }
927        VmValue::Duration(ms) => {
928            out.push('d');
929            out.push_str(&ms.to_string());
930            out.push(';');
931        }
932        VmValue::List(items) => {
933            out.push('L');
934            for item in items.iter() {
935                write_structural_hash_key(item, out);
936                out.push(',');
937            }
938            out.push(']');
939        }
940        VmValue::Dict(map) => {
941            out.push('D');
942            for (k, v) in map.iter() {
943                // Length-prefixed key
944                out.push_str(&k.len().to_string());
945                out.push(':');
946                out.push_str(k);
947                out.push('=');
948                write_structural_hash_key(v, out);
949                out.push(',');
950            }
951            out.push('}');
952        }
953        VmValue::Set(items) => {
954            // Sets need sorted keys for order-independence
955            let mut keys: Vec<String> = items.iter().map(value_structural_hash_key).collect();
956            keys.sort();
957            out.push('S');
958            for k in &keys {
959                out.push_str(k);
960                out.push(',');
961            }
962            out.push('}');
963        }
964        other => {
965            let tn = other.type_name();
966            out.push('o');
967            out.push_str(&tn.len().to_string());
968            out.push(':');
969            out.push_str(tn);
970            let d = other.display();
971            out.push_str(&d.len().to_string());
972            out.push(':');
973            out.push_str(&d);
974        }
975    }
976}
977
978pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
979    match (a, b) {
980        (VmValue::Int(x), VmValue::Int(y)) => x == y,
981        (VmValue::Float(x), VmValue::Float(y)) => x == y,
982        (VmValue::String(x), VmValue::String(y)) => x == y,
983        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
984        (VmValue::Nil, VmValue::Nil) => true,
985        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
986        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
987        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
988        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
989        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
990            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
991        }
992        (VmValue::List(a), VmValue::List(b)) => {
993            a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y))
994        }
995        (VmValue::Dict(a), VmValue::Dict(b)) => {
996            a.len() == b.len()
997                && a.iter()
998                    .zip(b.iter())
999                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
1000        }
1001        (
1002            VmValue::EnumVariant {
1003                enum_name: a_e,
1004                variant: a_v,
1005                fields: a_f,
1006            },
1007            VmValue::EnumVariant {
1008                enum_name: b_e,
1009                variant: b_v,
1010                fields: b_f,
1011            },
1012        ) => {
1013            a_e == b_e
1014                && a_v == b_v
1015                && a_f.len() == b_f.len()
1016                && a_f.iter().zip(b_f.iter()).all(|(x, y)| values_equal(x, y))
1017        }
1018        (
1019            VmValue::StructInstance {
1020                struct_name: a_s,
1021                fields: a_f,
1022            },
1023            VmValue::StructInstance {
1024                struct_name: b_s,
1025                fields: b_f,
1026            },
1027        ) => {
1028            a_s == b_s
1029                && a_f.len() == b_f.len()
1030                && a_f
1031                    .iter()
1032                    .zip(b_f.iter())
1033                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
1034        }
1035        (VmValue::Set(a), VmValue::Set(b)) => {
1036            a.len() == b.len() && a.iter().all(|x| b.iter().any(|y| values_equal(x, y)))
1037        }
1038        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
1039        (VmValue::Range(a), VmValue::Range(b)) => {
1040            a.start == b.start && a.end == b.end && a.inclusive == b.inclusive
1041        }
1042        (VmValue::Iter(a), VmValue::Iter(b)) => Rc::ptr_eq(a, b),
1043        (VmValue::Pair(a), VmValue::Pair(b)) => {
1044            values_equal(&a.0, &b.0) && values_equal(&a.1, &b.1)
1045        }
1046        _ => false,
1047    }
1048}
1049
1050pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
1051    match (a, b) {
1052        (VmValue::Int(x), VmValue::Int(y)) => x.cmp(y) as i32,
1053        (VmValue::Float(x), VmValue::Float(y)) => {
1054            if x < y {
1055                -1
1056            } else if x > y {
1057                1
1058            } else {
1059                0
1060            }
1061        }
1062        (VmValue::Int(x), VmValue::Float(y)) => {
1063            let x = *x as f64;
1064            if x < *y {
1065                -1
1066            } else if x > *y {
1067                1
1068            } else {
1069                0
1070            }
1071        }
1072        (VmValue::Float(x), VmValue::Int(y)) => {
1073            let y = *y as f64;
1074            if *x < y {
1075                -1
1076            } else if *x > y {
1077                1
1078            } else {
1079                0
1080            }
1081        }
1082        (VmValue::String(x), VmValue::String(y)) => x.cmp(y) as i32,
1083        (VmValue::Pair(x), VmValue::Pair(y)) => {
1084            let c = compare_values(&x.0, &y.0);
1085            if c != 0 {
1086                c
1087            } else {
1088                compare_values(&x.1, &y.1)
1089            }
1090        }
1091        _ => 0,
1092    }
1093}
1094
1095#[cfg(test)]
1096mod tests {
1097    use super::*;
1098
1099    fn s(val: &str) -> VmValue {
1100        VmValue::String(Rc::from(val))
1101    }
1102    fn i(val: i64) -> VmValue {
1103        VmValue::Int(val)
1104    }
1105    fn list(items: Vec<VmValue>) -> VmValue {
1106        VmValue::List(Rc::new(items))
1107    }
1108    fn dict(pairs: Vec<(&str, VmValue)>) -> VmValue {
1109        VmValue::Dict(Rc::new(
1110            pairs.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
1111        ))
1112    }
1113
1114    #[test]
1115    fn hash_key_cross_type_distinct() {
1116        // Int(1) vs String("1") vs Bool(true) must all differ
1117        let k_int = value_structural_hash_key(&i(1));
1118        let k_str = value_structural_hash_key(&s("1"));
1119        let k_bool = value_structural_hash_key(&VmValue::Bool(true));
1120        assert_ne!(k_int, k_str);
1121        assert_ne!(k_int, k_bool);
1122        assert_ne!(k_str, k_bool);
1123    }
1124
1125    #[test]
1126    fn hash_key_string_with_separator_chars() {
1127        // ["a,string:b"] (1-element list) vs ["a", "b"] (2-element list)
1128        let one_elem = list(vec![s("a,string:b")]);
1129        let two_elem = list(vec![s("a"), s("b")]);
1130        assert_ne!(
1131            value_structural_hash_key(&one_elem),
1132            value_structural_hash_key(&two_elem),
1133            "length-prefixed strings must prevent separator collisions"
1134        );
1135    }
1136
1137    #[test]
1138    fn hash_key_dict_key_with_equals() {
1139        // Dict with key "a=b" vs dict with key "a" and value containing "b"
1140        let d1 = dict(vec![("a=b", i(1))]);
1141        let d2 = dict(vec![("a", i(1))]);
1142        assert_ne!(
1143            value_structural_hash_key(&d1),
1144            value_structural_hash_key(&d2)
1145        );
1146    }
1147
1148    #[test]
1149    fn hash_key_nested_list_vs_flat() {
1150        // [[1]] vs [1]
1151        let nested = list(vec![list(vec![i(1)])]);
1152        let flat = list(vec![i(1)]);
1153        assert_ne!(
1154            value_structural_hash_key(&nested),
1155            value_structural_hash_key(&flat)
1156        );
1157    }
1158
1159    #[test]
1160    fn hash_key_nil() {
1161        assert_eq!(
1162            value_structural_hash_key(&VmValue::Nil),
1163            value_structural_hash_key(&VmValue::Nil)
1164        );
1165    }
1166
1167    #[test]
1168    fn hash_key_float_zero_vs_neg_zero() {
1169        let pos = VmValue::Float(0.0);
1170        let neg = VmValue::Float(-0.0);
1171        // 0.0 and -0.0 have different bit representations
1172        assert_ne!(
1173            value_structural_hash_key(&pos),
1174            value_structural_hash_key(&neg)
1175        );
1176    }
1177
1178    #[test]
1179    fn hash_key_equal_values_match() {
1180        let a = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1181        let b = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1182        assert_eq!(value_structural_hash_key(&a), value_structural_hash_key(&b));
1183    }
1184
1185    #[test]
1186    fn hash_key_dict_with_comma_key() {
1187        let d1 = dict(vec![("a,b", i(1))]);
1188        let d2 = dict(vec![("a", i(1))]);
1189        assert_ne!(
1190            value_structural_hash_key(&d1),
1191            value_structural_hash_key(&d2)
1192        );
1193    }
1194
1195    // --- VmRange arithmetic safety at i64 boundaries ---
1196    //
1197    // These guard the saturating/checked arithmetic in `VmRange::len` and
1198    // `VmRange::get` / `VmRange::to_vec`. Before the saturating rewrite the
1199    // inclusive `i64::MIN to 0` case panicked in debug builds on
1200    // `(end - start) + 1`.
1201
1202    #[test]
1203    fn vm_range_len_inclusive_saturates_at_i64_max() {
1204        let r = VmRange {
1205            start: i64::MIN,
1206            end: 0,
1207            inclusive: true,
1208        };
1209        // True width overflows i64; saturating at i64::MAX keeps this total.
1210        assert_eq!(r.len(), i64::MAX);
1211    }
1212
1213    #[test]
1214    fn vm_range_len_exclusive_full_range_saturates() {
1215        let r = VmRange {
1216            start: i64::MIN,
1217            end: i64::MAX,
1218            inclusive: false,
1219        };
1220        assert_eq!(r.len(), i64::MAX);
1221    }
1222
1223    #[test]
1224    fn vm_range_len_inclusive_full_range_saturates() {
1225        let r = VmRange {
1226            start: i64::MIN,
1227            end: i64::MAX,
1228            inclusive: true,
1229        };
1230        assert_eq!(r.len(), i64::MAX);
1231    }
1232
1233    #[test]
1234    fn vm_range_get_near_max_does_not_overflow() {
1235        let r = VmRange {
1236            start: i64::MAX - 2,
1237            end: i64::MAX,
1238            inclusive: true,
1239        };
1240        assert_eq!(r.len(), 3);
1241        assert_eq!(r.get(0), Some(i64::MAX - 2));
1242        assert_eq!(r.get(2), Some(i64::MAX));
1243        assert_eq!(r.get(3), None);
1244    }
1245
1246    #[test]
1247    fn vm_range_reversed_is_empty() {
1248        let r = VmRange {
1249            start: 5,
1250            end: 1,
1251            inclusive: true,
1252        };
1253        assert!(r.is_empty());
1254        assert_eq!(r.len(), 0);
1255        assert_eq!(r.first(), None);
1256        assert_eq!(r.last(), None);
1257    }
1258
1259    #[test]
1260    fn vm_range_contains_near_bounds() {
1261        let r = VmRange {
1262            start: 1,
1263            end: 5,
1264            inclusive: true,
1265        };
1266        assert!(r.contains(1));
1267        assert!(r.contains(5));
1268        assert!(!r.contains(0));
1269        assert!(!r.contains(6));
1270        let r = VmRange {
1271            start: 1,
1272            end: 5,
1273            inclusive: false,
1274        };
1275        assert!(r.contains(1));
1276        assert!(r.contains(4));
1277        assert!(!r.contains(5));
1278    }
1279
1280    #[test]
1281    fn vm_range_to_vec_matches_direct_iteration() {
1282        let r = VmRange {
1283            start: -2,
1284            end: 2,
1285            inclusive: true,
1286        };
1287        let v = r.to_vec();
1288        assert_eq!(v.len(), 5);
1289        assert_eq!(
1290            v.iter()
1291                .map(|x| match x {
1292                    VmValue::Int(n) => *n,
1293                    _ => panic!("non-int in range"),
1294                })
1295                .collect::<Vec<_>>(),
1296            vec![-2, -1, 0, 1, 2]
1297        );
1298    }
1299}
harn_vm/value.rs

harn_vm/
value.rs