Skip to main content

harn_vm/value/
structural.rs

1use std::sync::atomic::Ordering;
2use std::sync::Arc;
3
4use super::recursion::guard_recursion;
5use super::VmValue;
6
7/// Reference / identity equality. For heap-allocated refcounted values
8/// (List/Dict/Set/Closure) returns true only when both operands share the
9/// same underlying shared allocation. For primitive scalars, falls back to
10/// structural equality (since primitives have no distinct identity).
11pub fn values_identical(a: &VmValue, b: &VmValue) -> bool {
12    match (a, b) {
13        (VmValue::List(x), VmValue::List(y)) => Arc::ptr_eq(x, y),
14        (VmValue::Dict(x), VmValue::Dict(y)) => Arc::ptr_eq(x, y),
15        (VmValue::Set(x), VmValue::Set(y)) => Arc::ptr_eq(x, y),
16        (VmValue::Closure(x), VmValue::Closure(y)) => Arc::ptr_eq(x, y),
17        (VmValue::String(x), VmValue::String(y)) => arcstr::ArcStr::ptr_eq(x, y) || x == y,
18        (VmValue::Bytes(x), VmValue::Bytes(y)) => Arc::ptr_eq(x, y) || x == y,
19        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
20        (VmValue::BuiltinRefId(x), VmValue::BuiltinRefId(y)) => x.name == y.name,
21        (VmValue::BuiltinRef(x), VmValue::BuiltinRefId(y))
22        | (VmValue::BuiltinRefId(y), VmValue::BuiltinRef(x)) => x.as_str() == y.name.as_str(),
23        (VmValue::Pair(x), VmValue::Pair(y)) => Arc::ptr_eq(x, y),
24        // Primitives: identity collapses to structural equality.
25        _ => values_equal(a, b),
26    }
27}
28
29/// Stable identity key for a value. Different allocations produce different
30/// keys; two values with the same heap identity produce the same key. For
31/// primitives the key is derived from the displayed value plus type name so
32/// logically-equal primitives always compare equal.
33pub fn value_identity_key(v: &VmValue) -> String {
34    match v {
35        VmValue::List(x) => format!("list@{:p}", Arc::as_ptr(x)),
36        VmValue::Dict(x) => format!("dict@{:p}", Arc::as_ptr(x)),
37        VmValue::Set(x) => format!("set@{:p}", Arc::as_ptr(x)),
38        VmValue::Closure(x) => format!("closure@{:p}", Arc::as_ptr(x)),
39        VmValue::String(x) => format!("string@{:p}", x.as_ptr()),
40        VmValue::Bytes(x) => format!("bytes@{:p}", Arc::as_ptr(x)),
41        VmValue::BuiltinRef(name) => format!("builtin@{name}"),
42        VmValue::BuiltinRefId(r) => format!("builtin@{}", r.name),
43        other => format!("{}@{}", other.type_name(), other.display()),
44    }
45}
46
47/// Canonical string form used as the keying material for `hash_value`.
48/// Different types never collide (the type name is prepended) and collection
49/// order is preserved so structurally-equal values always produce the same
50/// key. Not intended for cross-process stability; depends on the in-process
51/// iteration order for collections (Dict uses BTreeMap so keys are sorted).
52pub fn value_structural_hash_key(v: &VmValue) -> String {
53    let mut out = String::new();
54    write_structural_hash_key(v, &mut out);
55    out
56}
57
58/// Writes the structural hash key for a value directly into `out`,
59/// avoiding intermediate allocations. Uses length-prefixed encoding
60/// for strings and dict keys to prevent separator collisions.
61fn write_structural_hash_key(v: &VmValue, out: &mut String) {
62    match v {
63        VmValue::Nil => out.push('N'),
64        VmValue::Bool(b) => {
65            out.push(if *b { 'T' } else { 'F' });
66        }
67        VmValue::Int(n) => write_int_hash_key(*n, out),
68        VmValue::Float(n) => {
69            // A float that is numerically an integer must hash identically to
70            // that integer, because `values_equal` treats `1 == 1.0` (and
71            // `0.0 == -0.0`) as equal — otherwise hash-keyed de-duplication
72            // would disagree with `==`. Non-integral / non-finite floats keep
73            // their bit pattern (so distinct NaNs may collide, which is fine:
74            // dedup confirms every hash hit with `values_equal`).
75            match float_as_equivalent_int(*n) {
76                Some(i) => write_int_hash_key(i, out),
77                None => {
78                    out.push('f');
79                    out.push_str(&n.to_bits().to_string());
80                    out.push(';');
81                }
82            }
83        }
84        VmValue::String(s) => {
85            // Length-prefixed: s<len>:<content> — no ambiguity from content
86            out.push('s');
87            write_len_prefixed(s, out);
88        }
89        VmValue::Bytes(bytes) => {
90            out.push('b');
91            for byte in bytes.iter() {
92                out.push_str(&format!("{byte:02x}"));
93            }
94            out.push(';');
95        }
96        VmValue::Duration(ms) => {
97            out.push('d');
98            out.push_str(&ms.to_string());
99            out.push(';');
100        }
101        // Normalize the scale so equal decimals hash alike (`1.5` and `1.50`
102        // are `values_equal`, so their keys must match). Decimal is its own
103        // namespace ('M') — it never collides with Int/Float keys, mirroring
104        // that `values_equal` treats Decimal as a distinct type from them.
105        VmValue::Decimal(dec) => {
106            out.push('M');
107            out.push_str(&dec.normalize().to_string());
108            out.push(';');
109        }
110        VmValue::List(items) => {
111            out.push('L');
112            guard_recursion(|| {
113                for item in items.iter() {
114                    write_structural_hash_key(item, out);
115                    out.push(',');
116                }
117            });
118            out.push(']');
119        }
120        VmValue::Dict(map) => {
121            out.push('D');
122            guard_recursion(|| {
123                for (k, v) in map.iter() {
124                    write_len_prefixed(k, out);
125                    out.push('=');
126                    write_structural_hash_key(v, out);
127                    out.push(',');
128                }
129            });
130            out.push('}');
131        }
132        VmValue::Set(set) => {
133            // Sets hash order-independently via their already-resident sorted
134            // structural keys.
135            out.push('S');
136            for k in set.sorted_keys() {
137                out.push_str(k);
138                out.push(',');
139            }
140            out.push('}');
141        }
142        // Composite values that can contain numbers recurse through this
143        // function (rather than the display fallback below) so numeric
144        // normalization propagates — e.g. `Some(1)` and `Some(1.0)`, which
145        // `values_equal` treats as equal, must hash alike.
146        VmValue::Pair(pair) => {
147            out.push('P');
148            guard_recursion(|| {
149                write_structural_hash_key(&pair.0, out);
150                out.push(',');
151                write_structural_hash_key(&pair.1, out);
152            });
153            out.push(';');
154        }
155        VmValue::EnumVariant(ev) => {
156            out.push('E');
157            write_len_prefixed(&ev.enum_name, out);
158            write_len_prefixed(&ev.variant, out);
159            guard_recursion(|| {
160                for field in ev.fields.iter() {
161                    write_structural_hash_key(field, out);
162                    out.push(',');
163                }
164            });
165            out.push(';');
166        }
167        VmValue::StructInstance(data) => {
168            let (layout, fields) = (&data.layout, &data.fields);
169            // Use the same name-keyed map `values_equal` compares, so field
170            // order in the layout never affects the key.
171            out.push('I');
172            write_len_prefixed(layout.struct_name(), out);
173            guard_recursion(|| {
174                for (k, v) in super::struct_fields_to_map(layout, fields) {
175                    write_len_prefixed(&k, out);
176                    out.push('=');
177                    write_structural_hash_key(&v, out);
178                    out.push(',');
179                }
180            });
181            out.push('}');
182        }
183        other => {
184            // Identity-only values (handles, channels, …) that `values_equal`
185            // never reports equal. Keyed by type + display purely so distinct
186            // ones rarely collide; dedup still confirms with `values_equal`.
187            out.push('o');
188            write_len_prefixed(other.type_name(), out);
189            write_len_prefixed(&other.display(), out);
190        }
191    }
192}
193
194/// Length-prefixed `<len>:<content>` encoding, so variable-length content can
195/// never be confused with surrounding structure (e.g. `"a,b"` vs `"a", "b"`).
196fn write_len_prefixed(s: &str, out: &mut String) {
197    out.push_str(&s.len().to_string());
198    out.push(':');
199    out.push_str(s);
200}
201
202/// Canonical integer hash-key encoding, shared by `Int` and by any `Float`
203/// that is numerically an integer (see [`float_as_equivalent_int`]).
204fn write_int_hash_key(n: i64, out: &mut String) {
205    out.push('i');
206    out.push_str(&n.to_string());
207    out.push(';');
208}
209
210/// Returns `Some(i)` when `n` compares equal to the `i64` value `i` under the
211/// same coercion `values_equal` uses for `Int`/`Float` (`(i as f64) == n`).
212/// `None` for non-integral or non-finite floats (incl. NaN / ±inf). This is
213/// the single source of truth that keeps the hash key consistent with `==`.
214fn float_as_equivalent_int(n: f64) -> Option<i64> {
215    let candidate = n as i64; // saturating, and NaN -> 0
216    ((candidate as f64) == n).then_some(candidate)
217}
218
219pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
220    match (a, b) {
221        (VmValue::Int(x), VmValue::Int(y)) => x == y,
222        (VmValue::Float(x), VmValue::Float(y)) => x == y,
223        // Decimal is a distinct type for equality (a clean island): it compares
224        // equal only to another Decimal, by value (rust_decimal's `==` ignores
225        // scale, so `1.5 == 1.50`). It is deliberately NOT cross-type-equal to
226        // Int/Float — that keeps equality transitive given the existing
227        // `Int == Float` coercion, and avoids silently equating exact money to
228        // lossy binary floats. Convert explicitly with `decimal(x)` to compare.
229        (VmValue::Decimal(x), VmValue::Decimal(y)) => x == y,
230        (VmValue::String(x), VmValue::String(y)) => x == y,
231        (VmValue::Bytes(x), VmValue::Bytes(y)) => x == y,
232        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
233        (VmValue::BuiltinRefId(x), VmValue::BuiltinRefId(y)) => x.name == y.name,
234        (VmValue::BuiltinRef(x), VmValue::BuiltinRefId(y))
235        | (VmValue::BuiltinRefId(y), VmValue::BuiltinRef(x)) => x.as_str() == y.name.as_str(),
236        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
237        (VmValue::Nil, VmValue::Nil) => true,
238        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
239        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
240        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
241        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
242        (VmValue::Rng(_), VmValue::Rng(_)) => false,
243        (VmValue::SyncPermit(_), VmValue::SyncPermit(_)) => false,
244        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
245            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
246        }
247        (VmValue::List(a), VmValue::List(b)) => {
248            a.len() == b.len()
249                && guard_recursion(|| a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y)))
250        }
251        (VmValue::Dict(a), VmValue::Dict(b)) => {
252            a.len() == b.len()
253                && guard_recursion(|| {
254                    a.iter()
255                        .zip(b.iter())
256                        .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
257                })
258        }
259        (VmValue::EnumVariant(a), VmValue::EnumVariant(b)) => {
260            a.enum_name == b.enum_name
261                && a.variant == b.variant
262                && a.fields.len() == b.fields.len()
263                && guard_recursion(|| {
264                    a.fields
265                        .iter()
266                        .zip(b.fields.iter())
267                        .all(|(x, y)| values_equal(x, y))
268                })
269        }
270        (VmValue::StructInstance(a), VmValue::StructInstance(b)) => {
271            let (a_layout, a_fields) = (&a.layout, &a.fields);
272            let (b_layout, b_fields) = (&b.layout, &b.fields);
273            if a_layout.struct_name() != b_layout.struct_name() {
274                return false;
275            }
276            let a_map = super::struct_fields_to_map(a_layout, a_fields);
277            let b_map = super::struct_fields_to_map(b_layout, b_fields);
278            a_map.len() == b_map.len()
279                && guard_recursion(|| {
280                    a_map
281                        .iter()
282                        .zip(b_map.iter())
283                        .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
284                })
285        }
286        (VmValue::Set(a), VmValue::Set(b)) => {
287            // Order-independent: equal length plus every member of `a` present
288            // in `b`, using `b`'s O(1) structural-key index.
289            a.len() == b.len() && a.iter().all(|x| b.contains(x))
290        }
291        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
292        (VmValue::Stream(_), VmValue::Stream(_)) => false,       // streams are never equal
293        (VmValue::Range(a), VmValue::Range(b)) => {
294            a.start == b.start && a.end == b.end && a.inclusive == b.inclusive
295        }
296        (VmValue::Iter(a), VmValue::Iter(b)) => Arc::ptr_eq(a, b),
297        (VmValue::Pair(a), VmValue::Pair(b)) => {
298            guard_recursion(|| values_equal(&a.0, &b.0) && values_equal(&a.1, &b.1))
299        }
300        // Harness handles carry runtime capability state, not values. Two
301        // handles that refer to the same backing capability are still
302        // observed-distinct because the script never compares them. Returning
303        // `false` matches `Channel` / `Generator` / `Stream` precedent.
304        (VmValue::Harness(_), VmValue::Harness(_)) => false,
305        _ => false,
306    }
307}
308
309/// Structural de-duplication that honors `values_equal`, preserving
310/// first-occurrence order.
311///
312/// Candidates are bucketed by [`value_structural_hash_key`] (so the common
313/// case stays near-O(n)), then every hash hit is confirmed with
314/// [`values_equal`]. Because the hash key is consistent with `values_equal`,
315/// numerically-equal values that hash alike (e.g. `1` and `1.0`) collapse to a
316/// single entry, while hash collisions between unequal values (e.g. two `NaN`s
317/// sharing a bit pattern) never merge — matching the `==` operator exactly.
318pub fn dedup_values<'a, I>(items: I) -> Vec<VmValue>
319where
320    I: IntoIterator<Item = &'a VmValue>,
321{
322    use std::collections::HashMap;
323    let mut buckets: HashMap<String, Vec<VmValue>> = HashMap::new();
324    let mut result = Vec::new();
325    for item in items {
326        let bucket = buckets.entry(value_structural_hash_key(item)).or_default();
327        if !bucket.iter().any(|kept| values_equal(kept, item)) {
328            bucket.push(item.clone());
329            result.push(item.clone());
330        }
331    }
332    result
333}
334
335/// Total-order comparison used for sorting, `min`/`max`, and similar reductions.
336///
337/// IEEE-754 NaN is *unordered*, so [`try_compare_values`] returns `None` for it;
338/// here we fall back to `0` (treat as equal) so a stray NaN does not destabilize
339/// a sort. Relational operators (`<`, `>`, `<=`, `>=`) must NOT use this fallback —
340/// they go through [`try_compare_values`] so that any comparison with NaN yields
341/// `false`, as the language spec and IEEE-754 require.
342pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
343    try_compare_values(a, b).unwrap_or(0)
344}
345
346/// Ordered comparison for relational operators. Returns `None` when the two
347/// values are *unordered* — i.e. a floating-point NaN is involved (directly, via
348/// an int/float mix, or nested inside a pair). Callers implementing `<`, `>`,
349/// `<=`, `>=` must treat `None` as "comparison is false".
350pub fn try_compare_values(a: &VmValue, b: &VmValue) -> Option<i32> {
351    match (a, b) {
352        (VmValue::Int(x), VmValue::Int(y)) => Some(x.cmp(y) as i32),
353        (VmValue::Float(x), VmValue::Float(y)) => float_ordering(*x, *y),
354        (VmValue::Int(x), VmValue::Float(y)) => float_ordering(*x as f64, *y),
355        (VmValue::Float(x), VmValue::Int(y)) => float_ordering(*x, *y as f64),
356        // Decimal orders only against Decimal (the same island rule as
357        // `values_equal`); a Decimal-vs-Int/Float comparison is *unordered*
358        // (`None`) so relational operators yield false rather than coercing
359        // through a lossy float or the `Some(0)` "equal" fallback below. `cmp`
360        // is exact and scale-insensitive.
361        (VmValue::Decimal(x), VmValue::Decimal(y)) => Some(x.cmp(y) as i32),
362        (VmValue::Decimal(_), VmValue::Int(_) | VmValue::Float(_))
363        | (VmValue::Int(_) | VmValue::Float(_), VmValue::Decimal(_)) => None,
364        (VmValue::String(x), VmValue::String(y)) => Some(x.cmp(y) as i32),
365        (VmValue::Pair(x), VmValue::Pair(y)) => guard_recursion(|| {
366            let c = try_compare_values(&x.0, &y.0)?;
367            if c != 0 {
368                Some(c)
369            } else {
370                try_compare_values(&x.1, &y.1)
371            }
372        }),
373        _ => Some(0),
374    }
375}
376
377fn float_ordering(x: f64, y: f64) -> Option<i32> {
378    x.partial_cmp(&y).map(|ord| ord as i32)
379}