harn_vm/
value.rs

1use std::collections::BTreeMap;
2use std::rc::Rc;
3use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
4use std::sync::Arc;
5use std::{cell::RefCell, path::PathBuf};
6
7use crate::chunk::CompiledFunction;
8use crate::mcp::VmMcpClientHandle;
9
10/// An async builtin function for the VM.
11pub type VmAsyncBuiltinFn = Rc<
12    dyn Fn(
13        Vec<VmValue>,
14    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<VmValue, VmError>>>>,
15>;
16
17/// The raw join handle type for spawned tasks.
18pub type VmJoinHandle = tokio::task::JoinHandle<Result<(VmValue, String), VmError>>;
19
20/// A spawned async task handle with cancellation support.
21pub struct VmTaskHandle {
22    pub handle: VmJoinHandle,
23    /// Cooperative cancellation token. Set to true to request graceful shutdown.
24    pub cancel_token: Arc<AtomicBool>,
25}
26
27/// A channel handle for the VM (uses tokio mpsc).
28#[derive(Debug, Clone)]
29pub struct VmChannelHandle {
30    pub name: String,
31    pub sender: Arc<tokio::sync::mpsc::Sender<VmValue>>,
32    pub receiver: Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
33    pub closed: Arc<AtomicBool>,
34}
35
36/// An atomic integer handle for the VM.
37#[derive(Debug, Clone)]
38pub struct VmAtomicHandle {
39    pub value: Arc<AtomicI64>,
40}
41
42/// A lazy integer range — Python-style. Stores only `(start, end, inclusive)`
43/// so the in-memory footprint is O(1) regardless of the range's length.
44/// `len()`, indexing (`r[k]`), `.contains(x)`, `.first()`, `.last()` are all
45/// O(1); direct iteration walks step-by-step without materializing a list.
46///
47/// Empty-range convention (Python-consistent):
48/// - Inclusive empty when `start > end`.
49/// - Exclusive empty when `start >= end`.
50///
51/// Negative / reversed ranges are NOT supported in v1: `5 to 1` is simply
52/// empty. Authors who want reverse iteration should call `.to_list().reverse()`.
53#[derive(Debug, Clone, Copy)]
54pub struct VmRange {
55    pub start: i64,
56    pub end: i64,
57    pub inclusive: bool,
58}
59
60impl VmRange {
61    /// Number of elements this range yields.
62    ///
63    /// Uses saturating arithmetic so that pathological ranges near
64    /// `i64::MAX`/`i64::MIN` do not panic on overflow. Because a range's
65    /// element count must fit in `i64` the returned length saturates at
66    /// `i64::MAX` for ranges whose width exceeds that (e.g. `i64::MIN to
67    /// i64::MAX` inclusive). Callers that later narrow to `usize` for
68    /// allocation should still guard against huge lengths — see
69    /// `to_vec` / `get` for the indexable-range invariants.
70    pub fn len(&self) -> i64 {
71        if self.inclusive {
72            if self.start > self.end {
73                0
74            } else {
75                self.end.saturating_sub(self.start).saturating_add(1)
76            }
77        } else if self.start >= self.end {
78            0
79        } else {
80            self.end.saturating_sub(self.start)
81        }
82    }
83
84    pub fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Element at the given 0-based index, bounds-checked.
89    /// Returns `None` when out of bounds or when `start + idx` would
90    /// overflow (which can only happen when `len()` saturated).
91    pub fn get(&self, idx: i64) -> Option<i64> {
92        if idx < 0 || idx >= self.len() {
93            None
94        } else {
95            self.start.checked_add(idx)
96        }
97    }
98
99    /// First element or `None` when empty.
100    pub fn first(&self) -> Option<i64> {
101        if self.is_empty() {
102            None
103        } else {
104            Some(self.start)
105        }
106    }
107
108    /// Last element or `None` when empty.
109    pub fn last(&self) -> Option<i64> {
110        if self.is_empty() {
111            None
112        } else if self.inclusive {
113            Some(self.end)
114        } else {
115            Some(self.end - 1)
116        }
117    }
118
119    /// Whether `v` falls inside the range (O(1)).
120    pub fn contains(&self, v: i64) -> bool {
121        if self.is_empty() {
122            return false;
123        }
124        if self.inclusive {
125            v >= self.start && v <= self.end
126        } else {
127            v >= self.start && v < self.end
128        }
129    }
130
131    /// Materialize to a `Vec<VmValue>` — the explicit escape hatch.
132    ///
133    /// Uses `checked_add` on the per-element index so a range near
134    /// `i64::MAX` stops at the representable bound instead of panicking.
135    /// Callers should still treat a very long range as unwise to
136    /// materialize (the whole point of `VmRange` is to avoid this).
137    pub fn to_vec(&self) -> Vec<VmValue> {
138        let len = self.len();
139        if len <= 0 {
140            return Vec::new();
141        }
142        let cap = len as usize;
143        let mut out = Vec::with_capacity(cap);
144        for i in 0..len {
145            match self.start.checked_add(i) {
146                Some(v) => out.push(VmValue::Int(v)),
147                None => break,
148            }
149        }
150        out
151    }
152}
153
154/// A generator object: lazily produces values via yield.
155/// The generator body runs as a spawned task that sends values through a channel.
156#[derive(Debug, Clone)]
157pub struct VmGenerator {
158    /// Whether the generator has finished (returned or exhausted).
159    pub done: Rc<std::cell::Cell<bool>>,
160    /// Receiver end of the yield channel (generator sends values here).
161    /// Wrapped in a shared async mutex so recv() can be called without holding
162    /// a RefCell borrow across await points.
163    pub receiver: Rc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
164}
165
166/// VM runtime value.
167#[derive(Debug, Clone)]
168pub enum VmValue {
169    Int(i64),
170    Float(f64),
171    String(Rc<str>),
172    Bool(bool),
173    Nil,
174    List(Rc<Vec<VmValue>>),
175    Dict(Rc<BTreeMap<String, VmValue>>),
176    Closure(Rc<VmClosure>),
177    /// Reference to a registered builtin function, used when a builtin name is
178    /// referenced as a value (e.g. `snake_dict.rekey(snake_to_camel)`). The
179    /// contained string is the builtin's registered name.
180    BuiltinRef(Rc<str>),
181    Duration(u64),
182    EnumVariant {
183        enum_name: String,
184        variant: String,
185        fields: Vec<VmValue>,
186    },
187    StructInstance {
188        struct_name: String,
189        fields: BTreeMap<String, VmValue>,
190    },
191    TaskHandle(String),
192    Channel(VmChannelHandle),
193    Atomic(VmAtomicHandle),
194    McpClient(VmMcpClientHandle),
195    Set(Rc<Vec<VmValue>>),
196    Generator(VmGenerator),
197    Range(VmRange),
198    /// Lazy iterator handle. Single-pass, fused. See `crate::vm::iter::VmIter`.
199    Iter(Rc<RefCell<crate::vm::iter::VmIter>>),
200    /// Two-element pair value. Produced by `pair(a, b)`, yielded by the
201    /// Dict iterator source, and (later) by `zip` / `enumerate` combinators.
202    /// Accessed via `.first` / `.second`, and destructurable in
203    /// `for (a, b) in ...` loops.
204    Pair(Rc<(VmValue, VmValue)>),
205}
206
207/// A compiled closure value.
208#[derive(Debug, Clone)]
209pub struct VmClosure {
210    pub func: CompiledFunction,
211    pub env: VmEnv,
212    /// Source directory for this closure's originating module.
213    /// When set, `render()` and other source-relative builtins resolve
214    /// paths relative to this directory instead of the entry pipeline.
215    pub source_dir: Option<PathBuf>,
216    /// Module-local named functions that should resolve before builtin fallback.
217    /// This lets selectively imported functions keep private sibling helpers
218    /// without exporting them into the caller's environment.
219    pub module_functions: Option<ModuleFunctionRegistry>,
220    /// Shared, mutable module-level env: holds top-level `var` / `let`
221    /// bindings declared at the module root (caches, counters, lazily
222    /// initialized registries). All closures created from the same
223    /// module import point at the same `Rc<RefCell<VmEnv>>`, so a
224    /// mutation inside one function is visible to every other function
225    /// in that module on subsequent calls. `closure.env` still holds
226    /// the per-closure lexical snapshot (captured function args from
227    /// enclosing scopes, etc.) and is unchanged by this — `module_state`
228    /// is a separate lookup layer consulted after the local env and
229    /// before globals. Created in `import_declarations` after the
230    /// module's init chunk runs, so the initial values from `var x = ...`
231    /// land in it.
232    pub module_state: Option<ModuleState>,
233}
234
235pub type ModuleFunctionRegistry = Rc<RefCell<BTreeMap<String, Rc<VmClosure>>>>;
236pub type ModuleState = Rc<RefCell<VmEnv>>;
237
238/// VM environment for variable storage.
239#[derive(Debug, Clone)]
240pub struct VmEnv {
241    pub(crate) scopes: Vec<Scope>,
242}
243
244#[derive(Debug, Clone)]
245pub(crate) struct Scope {
246    pub(crate) vars: BTreeMap<String, (VmValue, bool)>, // (value, mutable)
247}
248
249impl Default for VmEnv {
250    fn default() -> Self {
251        Self::new()
252    }
253}
254
255impl VmEnv {
256    pub fn new() -> Self {
257        Self {
258            scopes: vec![Scope {
259                vars: BTreeMap::new(),
260            }],
261        }
262    }
263
264    pub fn push_scope(&mut self) {
265        self.scopes.push(Scope {
266            vars: BTreeMap::new(),
267        });
268    }
269
270    pub fn pop_scope(&mut self) {
271        if self.scopes.len() > 1 {
272            self.scopes.pop();
273        }
274    }
275
276    pub fn scope_depth(&self) -> usize {
277        self.scopes.len()
278    }
279
280    pub fn truncate_scopes(&mut self, target_depth: usize) {
281        let min_depth = target_depth.max(1);
282        while self.scopes.len() > min_depth {
283            self.scopes.pop();
284        }
285    }
286
287    pub fn get(&self, name: &str) -> Option<VmValue> {
288        for scope in self.scopes.iter().rev() {
289            if let Some((val, _)) = scope.vars.get(name) {
290                return Some(val.clone());
291            }
292        }
293        None
294    }
295
296    pub fn define(&mut self, name: &str, value: VmValue, mutable: bool) -> Result<(), VmError> {
297        if let Some(scope) = self.scopes.last_mut() {
298            if let Some((_, existing_mutable)) = scope.vars.get(name) {
299                if !existing_mutable && !mutable {
300                    return Err(VmError::Runtime(format!(
301                        "Cannot redeclare immutable variable '{name}' in the same scope (use 'var' for mutable bindings)"
302                    )));
303                }
304            }
305            scope.vars.insert(name.to_string(), (value, mutable));
306        }
307        Ok(())
308    }
309
310    pub fn all_variables(&self) -> BTreeMap<String, VmValue> {
311        let mut vars = BTreeMap::new();
312        for scope in &self.scopes {
313            for (name, (value, _)) in &scope.vars {
314                vars.insert(name.clone(), value.clone());
315            }
316        }
317        vars
318    }
319
320    pub fn assign(&mut self, name: &str, value: VmValue) -> Result<(), VmError> {
321        for scope in self.scopes.iter_mut().rev() {
322            if let Some((_, mutable)) = scope.vars.get(name) {
323                if !mutable {
324                    return Err(VmError::ImmutableAssignment(name.to_string()));
325                }
326                scope.vars.insert(name.to_string(), (value, true));
327                return Ok(());
328            }
329        }
330        Err(VmError::UndefinedVariable(name.to_string()))
331    }
332}
333
334/// VM runtime errors.
335/// Compute Levenshtein edit distance between two strings.
336fn levenshtein(a: &str, b: &str) -> usize {
337    let a: Vec<char> = a.chars().collect();
338    let b: Vec<char> = b.chars().collect();
339    let (m, n) = (a.len(), b.len());
340    let mut prev = (0..=n).collect::<Vec<_>>();
341    let mut curr = vec![0; n + 1];
342    for i in 1..=m {
343        curr[0] = i;
344        for j in 1..=n {
345            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
346            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
347        }
348        std::mem::swap(&mut prev, &mut curr);
349    }
350    prev[n]
351}
352
353/// Find the closest match from a list of candidates using Levenshtein distance.
354/// Returns `Some(suggestion)` if a candidate is within `max_dist` edits.
355pub fn closest_match<'a>(name: &str, candidates: impl Iterator<Item = &'a str>) -> Option<String> {
356    let max_dist = match name.len() {
357        0..=2 => 1,
358        3..=5 => 2,
359        _ => 3,
360    };
361    candidates
362        .filter(|c| *c != name && !c.starts_with("__"))
363        .map(|c| (c, levenshtein(name, c)))
364        .filter(|(_, d)| *d <= max_dist)
365        // Prefer smallest distance, then closest length to original, then alphabetical
366        .min_by(|(a, da), (b, db)| {
367            da.cmp(db)
368                .then_with(|| {
369                    let a_diff = (a.len() as isize - name.len() as isize).unsigned_abs();
370                    let b_diff = (b.len() as isize - name.len() as isize).unsigned_abs();
371                    a_diff.cmp(&b_diff)
372                })
373                .then_with(|| a.cmp(b))
374        })
375        .map(|(c, _)| c.to_string())
376}
377
378#[derive(Debug, Clone)]
379pub enum VmError {
380    StackUnderflow,
381    StackOverflow,
382    UndefinedVariable(String),
383    UndefinedBuiltin(String),
384    ImmutableAssignment(String),
385    TypeError(String),
386    Runtime(String),
387    DivisionByZero,
388    Thrown(VmValue),
389    /// Thrown with error category for structured error handling.
390    CategorizedError {
391        message: String,
392        category: ErrorCategory,
393    },
394    Return(VmValue),
395    InvalidInstruction(u8),
396}
397
398/// Error categories for structured error handling in agent orchestration.
399#[derive(Debug, Clone, PartialEq, Eq)]
400pub enum ErrorCategory {
401    /// Network/connection timeout
402    Timeout,
403    /// Authentication/authorization failure
404    Auth,
405    /// Rate limit exceeded (HTTP 429 / quota)
406    RateLimit,
407    /// Upstream provider is overloaded (HTTP 503 / 529).
408    /// Distinct from RateLimit: the client hasn't exceeded a quota — the
409    /// provider is shedding load and will recover on its own.
410    Overloaded,
411    /// Provider-side 5xx error (500, 502) that isn't specifically overload.
412    ServerError,
413    /// Network-level transient failure (connection reset, DNS hiccup,
414    /// partial stream) — retryable but not provider-status-coded.
415    TransientNetwork,
416    /// LLM output failed schema validation. Retryable via `schema_retries`.
417    SchemaValidation,
418    /// Tool execution failure
419    ToolError,
420    /// Tool was rejected by the host (not permitted / not in allowlist)
421    ToolRejected,
422    /// Operation was cancelled
423    Cancelled,
424    /// Resource not found
425    NotFound,
426    /// Circuit breaker is open
427    CircuitOpen,
428    /// Generic/unclassified error
429    Generic,
430}
431
432impl ErrorCategory {
433    pub fn as_str(&self) -> &'static str {
434        match self {
435            ErrorCategory::Timeout => "timeout",
436            ErrorCategory::Auth => "auth",
437            ErrorCategory::RateLimit => "rate_limit",
438            ErrorCategory::Overloaded => "overloaded",
439            ErrorCategory::ServerError => "server_error",
440            ErrorCategory::TransientNetwork => "transient_network",
441            ErrorCategory::SchemaValidation => "schema_validation",
442            ErrorCategory::ToolError => "tool_error",
443            ErrorCategory::ToolRejected => "tool_rejected",
444            ErrorCategory::Cancelled => "cancelled",
445            ErrorCategory::NotFound => "not_found",
446            ErrorCategory::CircuitOpen => "circuit_open",
447            ErrorCategory::Generic => "generic",
448        }
449    }
450
451    pub fn parse(s: &str) -> Self {
452        match s {
453            "timeout" => ErrorCategory::Timeout,
454            "auth" => ErrorCategory::Auth,
455            "rate_limit" => ErrorCategory::RateLimit,
456            "overloaded" => ErrorCategory::Overloaded,
457            "server_error" => ErrorCategory::ServerError,
458            "transient_network" => ErrorCategory::TransientNetwork,
459            "schema_validation" => ErrorCategory::SchemaValidation,
460            "tool_error" => ErrorCategory::ToolError,
461            "tool_rejected" => ErrorCategory::ToolRejected,
462            "cancelled" => ErrorCategory::Cancelled,
463            "not_found" => ErrorCategory::NotFound,
464            "circuit_open" => ErrorCategory::CircuitOpen,
465            _ => ErrorCategory::Generic,
466        }
467    }
468
469    /// Whether an error of this category is worth retrying for a transient
470    /// provider-side reason. Agent loops consult this to decide whether to
471    /// back off and retry vs surface the error to the user.
472    pub fn is_transient(&self) -> bool {
473        matches!(
474            self,
475            ErrorCategory::Timeout
476                | ErrorCategory::RateLimit
477                | ErrorCategory::Overloaded
478                | ErrorCategory::ServerError
479                | ErrorCategory::TransientNetwork
480        )
481    }
482}
483
484/// Create a categorized error conveniently.
485pub fn categorized_error(message: impl Into<String>, category: ErrorCategory) -> VmError {
486    VmError::CategorizedError {
487        message: message.into(),
488        category,
489    }
490}
491
492/// Extract error category from a VmError.
493///
494/// Classification priority:
495/// 1. Explicit CategorizedError variant (set by throw_error or internal code)
496/// 2. Thrown dict with a "category" field (user-created structured errors)
497/// 3. HTTP status code extraction (standard, unambiguous)
498/// 4. Deadline exceeded (VM-internal)
499/// 5. Fallback to Generic
500pub fn error_to_category(err: &VmError) -> ErrorCategory {
501    match err {
502        VmError::CategorizedError { category, .. } => category.clone(),
503        VmError::Thrown(VmValue::Dict(d)) => d
504            .get("category")
505            .map(|v| ErrorCategory::parse(&v.display()))
506            .unwrap_or(ErrorCategory::Generic),
507        VmError::Thrown(VmValue::String(s)) => classify_error_message(s),
508        VmError::Runtime(msg) => classify_error_message(msg),
509        _ => ErrorCategory::Generic,
510    }
511}
512
513/// Classify an error message using HTTP status codes and well-known patterns.
514/// Prefers unambiguous signals (status codes) over substring heuristics.
515pub fn classify_error_message(msg: &str) -> ErrorCategory {
516    // 1. HTTP status codes — most reliable signal
517    if let Some(cat) = classify_by_http_status(msg) {
518        return cat;
519    }
520    // 2. Well-known error identifiers from major APIs
521    //    (Anthropic, OpenAI, and standard HTTP patterns)
522    if msg.contains("Deadline exceeded") || msg.contains("context deadline exceeded") {
523        return ErrorCategory::Timeout;
524    }
525    if msg.contains("overloaded_error") {
526        // Anthropic overloaded_error surfaces as HTTP 529.
527        return ErrorCategory::Overloaded;
528    }
529    if msg.contains("api_error") {
530        // Anthropic catch-all server-side error.
531        return ErrorCategory::ServerError;
532    }
533    if msg.contains("insufficient_quota") || msg.contains("billing_hard_limit_reached") {
534        // OpenAI-specific quota error types.
535        return ErrorCategory::RateLimit;
536    }
537    if msg.contains("invalid_api_key") || msg.contains("authentication_error") {
538        return ErrorCategory::Auth;
539    }
540    if msg.contains("not_found_error") || msg.contains("model_not_found") {
541        return ErrorCategory::NotFound;
542    }
543    if msg.contains("circuit_open") {
544        return ErrorCategory::CircuitOpen;
545    }
546    // Network-level transient patterns (pre-HTTP-status, pre-provider-framing).
547    let lower = msg.to_lowercase();
548    if lower.contains("connection reset")
549        || lower.contains("connection refused")
550        || lower.contains("connection closed")
551        || lower.contains("broken pipe")
552        || lower.contains("dns error")
553        || lower.contains("stream error")
554        || lower.contains("unexpected eof")
555    {
556        return ErrorCategory::TransientNetwork;
557    }
558    ErrorCategory::Generic
559}
560
561/// Classify errors by HTTP status code if one appears in the message.
562/// This is the most reliable classification method since status codes
563/// are standardized (RFC 9110) and unambiguous.
564fn classify_by_http_status(msg: &str) -> Option<ErrorCategory> {
565    // Extract 3-digit HTTP status codes from common patterns:
566    // "HTTP 429", "status 429", "429 Too Many", "error: 401"
567    for code in extract_http_status_codes(msg) {
568        return Some(match code {
569            401 | 403 => ErrorCategory::Auth,
570            404 | 410 => ErrorCategory::NotFound,
571            408 | 504 | 522 | 524 => ErrorCategory::Timeout,
572            429 => ErrorCategory::RateLimit,
573            503 | 529 => ErrorCategory::Overloaded,
574            500 | 502 => ErrorCategory::ServerError,
575            _ => continue,
576        });
577    }
578    None
579}
580
581/// Extract plausible HTTP status codes from an error message.
582fn extract_http_status_codes(msg: &str) -> Vec<u16> {
583    let mut codes = Vec::new();
584    let bytes = msg.as_bytes();
585    for i in 0..bytes.len().saturating_sub(2) {
586        // Look for 3-digit sequences in the 100-599 range
587        if bytes[i].is_ascii_digit()
588            && bytes[i + 1].is_ascii_digit()
589            && bytes[i + 2].is_ascii_digit()
590        {
591            // Ensure it's not part of a longer number
592            let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit();
593            let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit();
594            if before_ok && after_ok {
595                if let Ok(code) = msg[i..i + 3].parse::<u16>() {
596                    if (400..=599).contains(&code) {
597                        codes.push(code);
598                    }
599                }
600            }
601        }
602    }
603    codes
604}
605
606impl std::fmt::Display for VmError {
607    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
608        match self {
609            VmError::StackUnderflow => write!(f, "Stack underflow"),
610            VmError::StackOverflow => write!(f, "Stack overflow: too many nested calls"),
611            VmError::UndefinedVariable(n) => write!(f, "Undefined variable: {n}"),
612            VmError::UndefinedBuiltin(n) => write!(f, "Undefined builtin: {n}"),
613            VmError::ImmutableAssignment(n) => {
614                write!(f, "Cannot assign to immutable binding: {n}")
615            }
616            VmError::TypeError(msg) => write!(f, "Type error: {msg}"),
617            VmError::Runtime(msg) => write!(f, "Runtime error: {msg}"),
618            VmError::DivisionByZero => write!(f, "Division by zero"),
619            VmError::Thrown(v) => write!(f, "Thrown: {}", v.display()),
620            VmError::CategorizedError { message, category } => {
621                write!(f, "Error [{}]: {}", category.as_str(), message)
622            }
623            VmError::Return(_) => write!(f, "Return from function"),
624            VmError::InvalidInstruction(op) => write!(f, "Invalid instruction: 0x{op:02x}"),
625        }
626    }
627}
628
629impl std::error::Error for VmError {}
630
631impl VmValue {
632    pub fn is_truthy(&self) -> bool {
633        match self {
634            VmValue::Bool(b) => *b,
635            VmValue::Nil => false,
636            VmValue::Int(n) => *n != 0,
637            VmValue::Float(n) => *n != 0.0,
638            VmValue::String(s) => !s.is_empty(),
639            VmValue::List(l) => !l.is_empty(),
640            VmValue::Dict(d) => !d.is_empty(),
641            VmValue::Closure(_) => true,
642            VmValue::BuiltinRef(_) => true,
643            VmValue::Duration(ms) => *ms > 0,
644            VmValue::EnumVariant { .. } => true,
645            VmValue::StructInstance { .. } => true,
646            VmValue::TaskHandle(_) => true,
647            VmValue::Channel(_) => true,
648            VmValue::Atomic(_) => true,
649            VmValue::McpClient(_) => true,
650            VmValue::Set(s) => !s.is_empty(),
651            VmValue::Generator(_) => true,
652            // Match Python semantics: range objects are always truthy,
653            // even the empty range (analogous to generators / iterators).
654            VmValue::Range(_) => true,
655            VmValue::Iter(_) => true,
656            VmValue::Pair(_) => true,
657        }
658    }
659
660    pub fn type_name(&self) -> &'static str {
661        match self {
662            VmValue::String(_) => "string",
663            VmValue::Int(_) => "int",
664            VmValue::Float(_) => "float",
665            VmValue::Bool(_) => "bool",
666            VmValue::Nil => "nil",
667            VmValue::List(_) => "list",
668            VmValue::Dict(_) => "dict",
669            VmValue::Closure(_) => "closure",
670            VmValue::BuiltinRef(_) => "builtin",
671            VmValue::Duration(_) => "duration",
672            VmValue::EnumVariant { .. } => "enum",
673            VmValue::StructInstance { .. } => "struct",
674            VmValue::TaskHandle(_) => "task_handle",
675            VmValue::Channel(_) => "channel",
676            VmValue::Atomic(_) => "atomic",
677            VmValue::McpClient(_) => "mcp_client",
678            VmValue::Set(_) => "set",
679            VmValue::Generator(_) => "generator",
680            VmValue::Range(_) => "range",
681            VmValue::Iter(_) => "iter",
682            VmValue::Pair(_) => "pair",
683        }
684    }
685
686    pub fn display(&self) -> String {
687        let mut out = String::new();
688        self.write_display(&mut out);
689        out
690    }
691
692    /// Writes the display representation directly into `out`,
693    /// avoiding intermediate Vec<String> allocations for collections.
694    pub fn write_display(&self, out: &mut String) {
695        use std::fmt::Write;
696        match self {
697            VmValue::Int(n) => {
698                let _ = write!(out, "{n}");
699            }
700            VmValue::Float(n) => {
701                if *n == (*n as i64) as f64 && n.abs() < 1e15 {
702                    let _ = write!(out, "{n:.1}");
703                } else {
704                    let _ = write!(out, "{n}");
705                }
706            }
707            VmValue::String(s) => out.push_str(s),
708            VmValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
709            VmValue::Nil => out.push_str("nil"),
710            VmValue::List(items) => {
711                out.push('[');
712                for (i, item) in items.iter().enumerate() {
713                    if i > 0 {
714                        out.push_str(", ");
715                    }
716                    item.write_display(out);
717                }
718                out.push(']');
719            }
720            VmValue::Dict(map) => {
721                out.push('{');
722                for (i, (k, v)) in map.iter().enumerate() {
723                    if i > 0 {
724                        out.push_str(", ");
725                    }
726                    out.push_str(k);
727                    out.push_str(": ");
728                    v.write_display(out);
729                }
730                out.push('}');
731            }
732            VmValue::Closure(c) => {
733                let _ = write!(out, "<fn({})>", c.func.params.join(", "));
734            }
735            VmValue::BuiltinRef(name) => {
736                let _ = write!(out, "<builtin {name}>");
737            }
738            VmValue::Duration(ms) => {
739                if *ms >= 3_600_000 && ms % 3_600_000 == 0 {
740                    let _ = write!(out, "{}h", ms / 3_600_000);
741                } else if *ms >= 60_000 && ms % 60_000 == 0 {
742                    let _ = write!(out, "{}m", ms / 60_000);
743                } else if *ms >= 1000 && ms % 1000 == 0 {
744                    let _ = write!(out, "{}s", ms / 1000);
745                } else {
746                    let _ = write!(out, "{}ms", ms);
747                }
748            }
749            VmValue::EnumVariant {
750                enum_name,
751                variant,
752                fields,
753            } => {
754                if fields.is_empty() {
755                    let _ = write!(out, "{enum_name}.{variant}");
756                } else {
757                    let _ = write!(out, "{enum_name}.{variant}(");
758                    for (i, v) in fields.iter().enumerate() {
759                        if i > 0 {
760                            out.push_str(", ");
761                        }
762                        v.write_display(out);
763                    }
764                    out.push(')');
765                }
766            }
767            VmValue::StructInstance {
768                struct_name,
769                fields,
770            } => {
771                let _ = write!(out, "{struct_name} {{");
772                for (i, (k, v)) in fields.iter().enumerate() {
773                    if i > 0 {
774                        out.push_str(", ");
775                    }
776                    out.push_str(k);
777                    out.push_str(": ");
778                    v.write_display(out);
779                }
780                out.push('}');
781            }
782            VmValue::TaskHandle(id) => {
783                let _ = write!(out, "<task:{id}>");
784            }
785            VmValue::Channel(ch) => {
786                let _ = write!(out, "<channel:{}>", ch.name);
787            }
788            VmValue::Atomic(a) => {
789                let _ = write!(out, "<atomic:{}>", a.value.load(Ordering::SeqCst));
790            }
791            VmValue::McpClient(c) => {
792                let _ = write!(out, "<mcp_client:{}>", c.name);
793            }
794            VmValue::Set(items) => {
795                out.push_str("set(");
796                for (i, item) in items.iter().enumerate() {
797                    if i > 0 {
798                        out.push_str(", ");
799                    }
800                    item.write_display(out);
801                }
802                out.push(')');
803            }
804            VmValue::Generator(g) => {
805                if g.done.get() {
806                    out.push_str("<generator (done)>");
807                } else {
808                    out.push_str("<generator>");
809                }
810            }
811            // Print form mirrors source syntax: `1 to 5` / `0 to 3 exclusive`.
812            // `.to_list()` is the explicit path to materialize for display.
813            VmValue::Range(r) => {
814                let _ = write!(out, "{} to {}", r.start, r.end);
815                if !r.inclusive {
816                    out.push_str(" exclusive");
817                }
818            }
819            VmValue::Iter(h) => {
820                if matches!(&*h.borrow(), crate::vm::iter::VmIter::Exhausted) {
821                    out.push_str("<iter (exhausted)>");
822                } else {
823                    out.push_str("<iter>");
824                }
825            }
826            VmValue::Pair(p) => {
827                out.push('(');
828                p.0.write_display(out);
829                out.push_str(", ");
830                p.1.write_display(out);
831                out.push(')');
832            }
833        }
834    }
835
836    /// Get the value as a BTreeMap reference, if it's a Dict.
837    pub fn as_dict(&self) -> Option<&BTreeMap<String, VmValue>> {
838        if let VmValue::Dict(d) = self {
839            Some(d)
840        } else {
841            None
842        }
843    }
844
845    pub fn as_int(&self) -> Option<i64> {
846        if let VmValue::Int(n) = self {
847            Some(*n)
848        } else {
849            None
850        }
851    }
852}
853
854/// Sync builtin function for the VM.
855pub type VmBuiltinFn = Rc<dyn Fn(&[VmValue], &mut String) -> Result<VmValue, VmError>>;
856
857/// Reference / identity equality. For heap-allocated refcounted values
858/// (List/Dict/Set/Closure) returns true only when both operands share the
859/// same underlying `Rc` allocation. For primitive scalars, falls back to
860/// structural equality (since primitives have no distinct identity).
861pub fn values_identical(a: &VmValue, b: &VmValue) -> bool {
862    match (a, b) {
863        (VmValue::List(x), VmValue::List(y)) => Rc::ptr_eq(x, y),
864        (VmValue::Dict(x), VmValue::Dict(y)) => Rc::ptr_eq(x, y),
865        (VmValue::Set(x), VmValue::Set(y)) => Rc::ptr_eq(x, y),
866        (VmValue::Closure(x), VmValue::Closure(y)) => Rc::ptr_eq(x, y),
867        (VmValue::String(x), VmValue::String(y)) => Rc::ptr_eq(x, y) || x == y,
868        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
869        (VmValue::Pair(x), VmValue::Pair(y)) => Rc::ptr_eq(x, y),
870        // Primitives: identity collapses to structural equality.
871        _ => values_equal(a, b),
872    }
873}
874
875/// Stable identity key for a value. Different allocations produce different
876/// keys; two values with the same heap identity produce the same key. For
877/// primitives the key is derived from the displayed value plus type name so
878/// logically-equal primitives always compare equal.
879pub fn value_identity_key(v: &VmValue) -> String {
880    match v {
881        VmValue::List(x) => format!("list@{:p}", Rc::as_ptr(x)),
882        VmValue::Dict(x) => format!("dict@{:p}", Rc::as_ptr(x)),
883        VmValue::Set(x) => format!("set@{:p}", Rc::as_ptr(x)),
884        VmValue::Closure(x) => format!("closure@{:p}", Rc::as_ptr(x)),
885        VmValue::String(x) => format!("string@{:p}", x.as_ptr()),
886        VmValue::BuiltinRef(name) => format!("builtin@{name}"),
887        other => format!("{}@{}", other.type_name(), other.display()),
888    }
889}
890
891/// Canonical string form used as the keying material for `hash_value`.
892/// Different types never collide (the type name is prepended) and collection
893/// order is preserved so structurally-equal values always produce the same
894/// key. Not intended for cross-process stability; depends on the in-process
895/// iteration order for collections (Dict uses BTreeMap so keys are sorted).
896pub fn value_structural_hash_key(v: &VmValue) -> String {
897    let mut out = String::new();
898    write_structural_hash_key(v, &mut out);
899    out
900}
901
902/// Writes the structural hash key for a value directly into `out`,
903/// avoiding intermediate allocations. Uses length-prefixed encoding
904/// for strings and dict keys to prevent separator collisions.
905fn write_structural_hash_key(v: &VmValue, out: &mut String) {
906    match v {
907        VmValue::Nil => out.push('N'),
908        VmValue::Bool(b) => {
909            out.push(if *b { 'T' } else { 'F' });
910        }
911        VmValue::Int(n) => {
912            out.push('i');
913            out.push_str(&n.to_string());
914            out.push(';');
915        }
916        VmValue::Float(n) => {
917            out.push('f');
918            out.push_str(&n.to_bits().to_string());
919            out.push(';');
920        }
921        VmValue::String(s) => {
922            // Length-prefixed: s<len>:<content> — no ambiguity from content
923            out.push('s');
924            out.push_str(&s.len().to_string());
925            out.push(':');
926            out.push_str(s);
927        }
928        VmValue::Duration(ms) => {
929            out.push('d');
930            out.push_str(&ms.to_string());
931            out.push(';');
932        }
933        VmValue::List(items) => {
934            out.push('L');
935            for item in items.iter() {
936                write_structural_hash_key(item, out);
937                out.push(',');
938            }
939            out.push(']');
940        }
941        VmValue::Dict(map) => {
942            out.push('D');
943            for (k, v) in map.iter() {
944                // Length-prefixed key
945                out.push_str(&k.len().to_string());
946                out.push(':');
947                out.push_str(k);
948                out.push('=');
949                write_structural_hash_key(v, out);
950                out.push(',');
951            }
952            out.push('}');
953        }
954        VmValue::Set(items) => {
955            // Sets need sorted keys for order-independence
956            let mut keys: Vec<String> = items.iter().map(value_structural_hash_key).collect();
957            keys.sort();
958            out.push('S');
959            for k in &keys {
960                out.push_str(k);
961                out.push(',');
962            }
963            out.push('}');
964        }
965        other => {
966            let tn = other.type_name();
967            out.push('o');
968            out.push_str(&tn.len().to_string());
969            out.push(':');
970            out.push_str(tn);
971            let d = other.display();
972            out.push_str(&d.len().to_string());
973            out.push(':');
974            out.push_str(&d);
975        }
976    }
977}
978
979pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
980    match (a, b) {
981        (VmValue::Int(x), VmValue::Int(y)) => x == y,
982        (VmValue::Float(x), VmValue::Float(y)) => x == y,
983        (VmValue::String(x), VmValue::String(y)) => x == y,
984        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
985        (VmValue::Nil, VmValue::Nil) => true,
986        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
987        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
988        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
989        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
990        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
991            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
992        }
993        (VmValue::List(a), VmValue::List(b)) => {
994            a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y))
995        }
996        (VmValue::Dict(a), VmValue::Dict(b)) => {
997            a.len() == b.len()
998                && a.iter()
999                    .zip(b.iter())
1000                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
1001        }
1002        (
1003            VmValue::EnumVariant {
1004                enum_name: a_e,
1005                variant: a_v,
1006                fields: a_f,
1007            },
1008            VmValue::EnumVariant {
1009                enum_name: b_e,
1010                variant: b_v,
1011                fields: b_f,
1012            },
1013        ) => {
1014            a_e == b_e
1015                && a_v == b_v
1016                && a_f.len() == b_f.len()
1017                && a_f.iter().zip(b_f.iter()).all(|(x, y)| values_equal(x, y))
1018        }
1019        (
1020            VmValue::StructInstance {
1021                struct_name: a_s,
1022                fields: a_f,
1023            },
1024            VmValue::StructInstance {
1025                struct_name: b_s,
1026                fields: b_f,
1027            },
1028        ) => {
1029            a_s == b_s
1030                && a_f.len() == b_f.len()
1031                && a_f
1032                    .iter()
1033                    .zip(b_f.iter())
1034                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
1035        }
1036        (VmValue::Set(a), VmValue::Set(b)) => {
1037            a.len() == b.len() && a.iter().all(|x| b.iter().any(|y| values_equal(x, y)))
1038        }
1039        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
1040        (VmValue::Range(a), VmValue::Range(b)) => {
1041            a.start == b.start && a.end == b.end && a.inclusive == b.inclusive
1042        }
1043        (VmValue::Iter(a), VmValue::Iter(b)) => Rc::ptr_eq(a, b),
1044        (VmValue::Pair(a), VmValue::Pair(b)) => {
1045            values_equal(&a.0, &b.0) && values_equal(&a.1, &b.1)
1046        }
1047        _ => false,
1048    }
1049}
1050
1051pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
1052    match (a, b) {
1053        (VmValue::Int(x), VmValue::Int(y)) => x.cmp(y) as i32,
1054        (VmValue::Float(x), VmValue::Float(y)) => {
1055            if x < y {
1056                -1
1057            } else if x > y {
1058                1
1059            } else {
1060                0
1061            }
1062        }
1063        (VmValue::Int(x), VmValue::Float(y)) => {
1064            let x = *x as f64;
1065            if x < *y {
1066                -1
1067            } else if x > *y {
1068                1
1069            } else {
1070                0
1071            }
1072        }
1073        (VmValue::Float(x), VmValue::Int(y)) => {
1074            let y = *y as f64;
1075            if *x < y {
1076                -1
1077            } else if *x > y {
1078                1
1079            } else {
1080                0
1081            }
1082        }
1083        (VmValue::String(x), VmValue::String(y)) => x.cmp(y) as i32,
1084        (VmValue::Pair(x), VmValue::Pair(y)) => {
1085            let c = compare_values(&x.0, &y.0);
1086            if c != 0 {
1087                c
1088            } else {
1089                compare_values(&x.1, &y.1)
1090            }
1091        }
1092        _ => 0,
1093    }
1094}
1095
1096#[cfg(test)]
1097mod tests {
1098    use super::*;
1099
1100    fn s(val: &str) -> VmValue {
1101        VmValue::String(Rc::from(val))
1102    }
1103    fn i(val: i64) -> VmValue {
1104        VmValue::Int(val)
1105    }
1106    fn list(items: Vec<VmValue>) -> VmValue {
1107        VmValue::List(Rc::new(items))
1108    }
1109    fn dict(pairs: Vec<(&str, VmValue)>) -> VmValue {
1110        VmValue::Dict(Rc::new(
1111            pairs.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
1112        ))
1113    }
1114
1115    #[test]
1116    fn hash_key_cross_type_distinct() {
1117        // Int(1) vs String("1") vs Bool(true) must all differ
1118        let k_int = value_structural_hash_key(&i(1));
1119        let k_str = value_structural_hash_key(&s("1"));
1120        let k_bool = value_structural_hash_key(&VmValue::Bool(true));
1121        assert_ne!(k_int, k_str);
1122        assert_ne!(k_int, k_bool);
1123        assert_ne!(k_str, k_bool);
1124    }
1125
1126    #[test]
1127    fn hash_key_string_with_separator_chars() {
1128        // ["a,string:b"] (1-element list) vs ["a", "b"] (2-element list)
1129        let one_elem = list(vec![s("a,string:b")]);
1130        let two_elem = list(vec![s("a"), s("b")]);
1131        assert_ne!(
1132            value_structural_hash_key(&one_elem),
1133            value_structural_hash_key(&two_elem),
1134            "length-prefixed strings must prevent separator collisions"
1135        );
1136    }
1137
1138    #[test]
1139    fn hash_key_dict_key_with_equals() {
1140        // Dict with key "a=b" vs dict with key "a" and value containing "b"
1141        let d1 = dict(vec![("a=b", i(1))]);
1142        let d2 = dict(vec![("a", i(1))]);
1143        assert_ne!(
1144            value_structural_hash_key(&d1),
1145            value_structural_hash_key(&d2)
1146        );
1147    }
1148
1149    #[test]
1150    fn hash_key_nested_list_vs_flat() {
1151        // [[1]] vs [1]
1152        let nested = list(vec![list(vec![i(1)])]);
1153        let flat = list(vec![i(1)]);
1154        assert_ne!(
1155            value_structural_hash_key(&nested),
1156            value_structural_hash_key(&flat)
1157        );
1158    }
1159
1160    #[test]
1161    fn hash_key_nil() {
1162        assert_eq!(
1163            value_structural_hash_key(&VmValue::Nil),
1164            value_structural_hash_key(&VmValue::Nil)
1165        );
1166    }
1167
1168    #[test]
1169    fn hash_key_float_zero_vs_neg_zero() {
1170        let pos = VmValue::Float(0.0);
1171        let neg = VmValue::Float(-0.0);
1172        // 0.0 and -0.0 have different bit representations
1173        assert_ne!(
1174            value_structural_hash_key(&pos),
1175            value_structural_hash_key(&neg)
1176        );
1177    }
1178
1179    #[test]
1180    fn hash_key_equal_values_match() {
1181        let a = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1182        let b = list(vec![s("hello"), i(42), VmValue::Bool(false)]);
1183        assert_eq!(value_structural_hash_key(&a), value_structural_hash_key(&b));
1184    }
1185
1186    #[test]
1187    fn hash_key_dict_with_comma_key() {
1188        let d1 = dict(vec![("a,b", i(1))]);
1189        let d2 = dict(vec![("a", i(1))]);
1190        assert_ne!(
1191            value_structural_hash_key(&d1),
1192            value_structural_hash_key(&d2)
1193        );
1194    }
1195
1196    // --- VmRange arithmetic safety at i64 boundaries ---
1197    //
1198    // These guard the saturating/checked arithmetic in `VmRange::len` and
1199    // `VmRange::get` / `VmRange::to_vec`. Before the saturating rewrite the
1200    // inclusive `i64::MIN to 0` case panicked in debug builds on
1201    // `(end - start) + 1`.
1202
1203    #[test]
1204    fn vm_range_len_inclusive_saturates_at_i64_max() {
1205        let r = VmRange {
1206            start: i64::MIN,
1207            end: 0,
1208            inclusive: true,
1209        };
1210        // True width overflows i64; saturating at i64::MAX keeps this total.
1211        assert_eq!(r.len(), i64::MAX);
1212    }
1213
1214    #[test]
1215    fn vm_range_len_exclusive_full_range_saturates() {
1216        let r = VmRange {
1217            start: i64::MIN,
1218            end: i64::MAX,
1219            inclusive: false,
1220        };
1221        assert_eq!(r.len(), i64::MAX);
1222    }
1223
1224    #[test]
1225    fn vm_range_len_inclusive_full_range_saturates() {
1226        let r = VmRange {
1227            start: i64::MIN,
1228            end: i64::MAX,
1229            inclusive: true,
1230        };
1231        assert_eq!(r.len(), i64::MAX);
1232    }
1233
1234    #[test]
1235    fn vm_range_get_near_max_does_not_overflow() {
1236        let r = VmRange {
1237            start: i64::MAX - 2,
1238            end: i64::MAX,
1239            inclusive: true,
1240        };
1241        assert_eq!(r.len(), 3);
1242        assert_eq!(r.get(0), Some(i64::MAX - 2));
1243        assert_eq!(r.get(2), Some(i64::MAX));
1244        assert_eq!(r.get(3), None);
1245    }
1246
1247    #[test]
1248    fn vm_range_reversed_is_empty() {
1249        let r = VmRange {
1250            start: 5,
1251            end: 1,
1252            inclusive: true,
1253        };
1254        assert!(r.is_empty());
1255        assert_eq!(r.len(), 0);
1256        assert_eq!(r.first(), None);
1257        assert_eq!(r.last(), None);
1258    }
1259
1260    #[test]
1261    fn vm_range_contains_near_bounds() {
1262        let r = VmRange {
1263            start: 1,
1264            end: 5,
1265            inclusive: true,
1266        };
1267        assert!(r.contains(1));
1268        assert!(r.contains(5));
1269        assert!(!r.contains(0));
1270        assert!(!r.contains(6));
1271        let r = VmRange {
1272            start: 1,
1273            end: 5,
1274            inclusive: false,
1275        };
1276        assert!(r.contains(1));
1277        assert!(r.contains(4));
1278        assert!(!r.contains(5));
1279    }
1280
1281    #[test]
1282    fn vm_range_to_vec_matches_direct_iteration() {
1283        let r = VmRange {
1284            start: -2,
1285            end: 2,
1286            inclusive: true,
1287        };
1288        let v = r.to_vec();
1289        assert_eq!(v.len(), 5);
1290        assert_eq!(
1291            v.iter()
1292                .map(|x| match x {
1293                    VmValue::Int(n) => *n,
1294                    _ => panic!("non-int in range"),
1295                })
1296                .collect::<Vec<_>>(),
1297            vec![-2, -1, 0, 1, 2]
1298        );
1299    }
1300}
harn_vm/value.rs

harn_vm/
value.rs