Skip to main content

harn_vm/value/
structural.rs

1use std::sync::atomic::Ordering;
2use std::sync::Arc;
3
4use super::VmValue;
5
6/// Reference / identity equality. For heap-allocated refcounted values
7/// (List/Dict/Set/Closure) returns true only when both operands share the
8/// same underlying shared allocation. For primitive scalars, falls back to
9/// structural equality (since primitives have no distinct identity).
10pub fn values_identical(a: &VmValue, b: &VmValue) -> bool {
11    match (a, b) {
12        (VmValue::List(x), VmValue::List(y)) => Arc::ptr_eq(x, y),
13        (VmValue::Dict(x), VmValue::Dict(y)) => Arc::ptr_eq(x, y),
14        (VmValue::Set(x), VmValue::Set(y)) => Arc::ptr_eq(x, y),
15        (VmValue::Closure(x), VmValue::Closure(y)) => Arc::ptr_eq(x, y),
16        (VmValue::String(x), VmValue::String(y)) => Arc::ptr_eq(x, y) || x == y,
17        (VmValue::Bytes(x), VmValue::Bytes(y)) => Arc::ptr_eq(x, y) || x == y,
18        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
19        (VmValue::BuiltinRefId { name: x, .. }, VmValue::BuiltinRefId { name: y, .. }) => x == y,
20        (VmValue::BuiltinRef(x), VmValue::BuiltinRefId { name: y, .. })
21        | (VmValue::BuiltinRefId { name: y, .. }, VmValue::BuiltinRef(x)) => x == y,
22        (VmValue::Pair(x), VmValue::Pair(y)) => Arc::ptr_eq(x, y),
23        // Primitives: identity collapses to structural equality.
24        _ => values_equal(a, b),
25    }
26}
27
28/// Stable identity key for a value. Different allocations produce different
29/// keys; two values with the same heap identity produce the same key. For
30/// primitives the key is derived from the displayed value plus type name so
31/// logically-equal primitives always compare equal.
32pub fn value_identity_key(v: &VmValue) -> String {
33    match v {
34        VmValue::List(x) => format!("list@{:p}", Arc::as_ptr(x)),
35        VmValue::Dict(x) => format!("dict@{:p}", Arc::as_ptr(x)),
36        VmValue::Set(x) => format!("set@{:p}", Arc::as_ptr(x)),
37        VmValue::Closure(x) => format!("closure@{:p}", Arc::as_ptr(x)),
38        VmValue::String(x) => format!("string@{:p}", x.as_ptr()),
39        VmValue::Bytes(x) => format!("bytes@{:p}", Arc::as_ptr(x)),
40        VmValue::BuiltinRef(name) => format!("builtin@{name}"),
41        VmValue::BuiltinRefId { name, .. } => format!("builtin@{name}"),
42        other => format!("{}@{}", other.type_name(), other.display()),
43    }
44}
45
46/// Canonical string form used as the keying material for `hash_value`.
47/// Different types never collide (the type name is prepended) and collection
48/// order is preserved so structurally-equal values always produce the same
49/// key. Not intended for cross-process stability; depends on the in-process
50/// iteration order for collections (Dict uses BTreeMap so keys are sorted).
51pub fn value_structural_hash_key(v: &VmValue) -> String {
52    let mut out = String::new();
53    write_structural_hash_key(v, &mut out);
54    out
55}
56
57/// Writes the structural hash key for a value directly into `out`,
58/// avoiding intermediate allocations. Uses length-prefixed encoding
59/// for strings and dict keys to prevent separator collisions.
60fn write_structural_hash_key(v: &VmValue, out: &mut String) {
61    match v {
62        VmValue::Nil => out.push('N'),
63        VmValue::Bool(b) => {
64            out.push(if *b { 'T' } else { 'F' });
65        }
66        VmValue::Int(n) => write_int_hash_key(*n, out),
67        VmValue::Float(n) => {
68            // A float that is numerically an integer must hash identically to
69            // that integer, because `values_equal` treats `1 == 1.0` (and
70            // `0.0 == -0.0`) as equal — otherwise hash-keyed de-duplication
71            // would disagree with `==`. Non-integral / non-finite floats keep
72            // their bit pattern (so distinct NaNs may collide, which is fine:
73            // dedup confirms every hash hit with `values_equal`).
74            match float_as_equivalent_int(*n) {
75                Some(i) => write_int_hash_key(i, out),
76                None => {
77                    out.push('f');
78                    out.push_str(&n.to_bits().to_string());
79                    out.push(';');
80                }
81            }
82        }
83        VmValue::String(s) => {
84            // Length-prefixed: s<len>:<content> — no ambiguity from content
85            out.push('s');
86            write_len_prefixed(s, out);
87        }
88        VmValue::Bytes(bytes) => {
89            out.push('b');
90            for byte in bytes.iter() {
91                out.push_str(&format!("{byte:02x}"));
92            }
93            out.push(';');
94        }
95        VmValue::Duration(ms) => {
96            out.push('d');
97            out.push_str(&ms.to_string());
98            out.push(';');
99        }
100        VmValue::List(items) => {
101            out.push('L');
102            for item in items.iter() {
103                write_structural_hash_key(item, out);
104                out.push(',');
105            }
106            out.push(']');
107        }
108        VmValue::Dict(map) => {
109            out.push('D');
110            for (k, v) in map.iter() {
111                write_len_prefixed(k, out);
112                out.push('=');
113                write_structural_hash_key(v, out);
114                out.push(',');
115            }
116            out.push('}');
117        }
118        VmValue::Set(items) => {
119            // Sets need sorted keys for order-independence
120            let mut keys: Vec<String> = items.iter().map(value_structural_hash_key).collect();
121            keys.sort();
122            out.push('S');
123            for k in &keys {
124                out.push_str(k);
125                out.push(',');
126            }
127            out.push('}');
128        }
129        // Composite values that can contain numbers recurse through this
130        // function (rather than the display fallback below) so numeric
131        // normalization propagates — e.g. `Some(1)` and `Some(1.0)`, which
132        // `values_equal` treats as equal, must hash alike.
133        VmValue::Pair(pair) => {
134            out.push('P');
135            write_structural_hash_key(&pair.0, out);
136            out.push(',');
137            write_structural_hash_key(&pair.1, out);
138            out.push(';');
139        }
140        VmValue::EnumVariant(ev) => {
141            out.push('E');
142            write_len_prefixed(&ev.enum_name, out);
143            write_len_prefixed(&ev.variant, out);
144            for field in ev.fields.iter() {
145                write_structural_hash_key(field, out);
146                out.push(',');
147            }
148            out.push(';');
149        }
150        VmValue::StructInstance { layout, fields } => {
151            // Use the same name-keyed map `values_equal` compares, so field
152            // order in the layout never affects the key.
153            out.push('I');
154            write_len_prefixed(layout.struct_name(), out);
155            for (k, v) in super::struct_fields_to_map(layout, fields) {
156                write_len_prefixed(&k, out);
157                out.push('=');
158                write_structural_hash_key(&v, out);
159                out.push(',');
160            }
161            out.push('}');
162        }
163        other => {
164            // Identity-only values (handles, channels, …) that `values_equal`
165            // never reports equal. Keyed by type + display purely so distinct
166            // ones rarely collide; dedup still confirms with `values_equal`.
167            out.push('o');
168            write_len_prefixed(other.type_name(), out);
169            write_len_prefixed(&other.display(), out);
170        }
171    }
172}
173
174/// Length-prefixed `<len>:<content>` encoding, so variable-length content can
175/// never be confused with surrounding structure (e.g. `"a,b"` vs `"a", "b"`).
176fn write_len_prefixed(s: &str, out: &mut String) {
177    out.push_str(&s.len().to_string());
178    out.push(':');
179    out.push_str(s);
180}
181
182/// Canonical integer hash-key encoding, shared by `Int` and by any `Float`
183/// that is numerically an integer (see [`float_as_equivalent_int`]).
184fn write_int_hash_key(n: i64, out: &mut String) {
185    out.push('i');
186    out.push_str(&n.to_string());
187    out.push(';');
188}
189
190/// Returns `Some(i)` when `n` compares equal to the `i64` value `i` under the
191/// same coercion `values_equal` uses for `Int`/`Float` (`(i as f64) == n`).
192/// `None` for non-integral or non-finite floats (incl. NaN / ±inf). This is
193/// the single source of truth that keeps the hash key consistent with `==`.
194fn float_as_equivalent_int(n: f64) -> Option<i64> {
195    let candidate = n as i64; // saturating, and NaN -> 0
196    ((candidate as f64) == n).then_some(candidate)
197}
198
199pub fn values_equal(a: &VmValue, b: &VmValue) -> bool {
200    match (a, b) {
201        (VmValue::Int(x), VmValue::Int(y)) => x == y,
202        (VmValue::Float(x), VmValue::Float(y)) => x == y,
203        (VmValue::String(x), VmValue::String(y)) => x == y,
204        (VmValue::Bytes(x), VmValue::Bytes(y)) => x == y,
205        (VmValue::BuiltinRef(x), VmValue::BuiltinRef(y)) => x == y,
206        (VmValue::BuiltinRefId { name: x, .. }, VmValue::BuiltinRefId { name: y, .. }) => x == y,
207        (VmValue::BuiltinRef(x), VmValue::BuiltinRefId { name: y, .. })
208        | (VmValue::BuiltinRefId { name: y, .. }, VmValue::BuiltinRef(x)) => x == y,
209        (VmValue::Bool(x), VmValue::Bool(y)) => x == y,
210        (VmValue::Nil, VmValue::Nil) => true,
211        (VmValue::Int(x), VmValue::Float(y)) => (*x as f64) == *y,
212        (VmValue::Float(x), VmValue::Int(y)) => *x == (*y as f64),
213        (VmValue::TaskHandle(a), VmValue::TaskHandle(b)) => a == b,
214        (VmValue::Channel(_), VmValue::Channel(_)) => false, // channels are never equal
215        (VmValue::Rng(_), VmValue::Rng(_)) => false,
216        (VmValue::SyncPermit(_), VmValue::SyncPermit(_)) => false,
217        (VmValue::Atomic(a), VmValue::Atomic(b)) => {
218            a.value.load(Ordering::SeqCst) == b.value.load(Ordering::SeqCst)
219        }
220        (VmValue::List(a), VmValue::List(b)) => {
221            a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y))
222        }
223        (VmValue::Dict(a), VmValue::Dict(b)) => {
224            a.len() == b.len()
225                && a.iter()
226                    .zip(b.iter())
227                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
228        }
229        (VmValue::EnumVariant(a), VmValue::EnumVariant(b)) => {
230            a.enum_name == b.enum_name
231                && a.variant == b.variant
232                && a.fields.len() == b.fields.len()
233                && a.fields
234                    .iter()
235                    .zip(b.fields.iter())
236                    .all(|(x, y)| values_equal(x, y))
237        }
238        (
239            VmValue::StructInstance {
240                layout: a_layout,
241                fields: a_fields,
242            },
243            VmValue::StructInstance {
244                layout: b_layout,
245                fields: b_fields,
246            },
247        ) => {
248            if a_layout.struct_name() != b_layout.struct_name() {
249                return false;
250            }
251            let a_map = super::struct_fields_to_map(a_layout, a_fields);
252            let b_map = super::struct_fields_to_map(b_layout, b_fields);
253            a_map.len() == b_map.len()
254                && a_map
255                    .iter()
256                    .zip(b_map.iter())
257                    .all(|((k1, v1), (k2, v2))| k1 == k2 && values_equal(v1, v2))
258        }
259        (VmValue::Set(a), VmValue::Set(b)) => {
260            a.len() == b.len() && a.iter().all(|x| b.iter().any(|y| values_equal(x, y)))
261        }
262        (VmValue::Generator(_), VmValue::Generator(_)) => false, // generators are never equal
263        (VmValue::Stream(_), VmValue::Stream(_)) => false,       // streams are never equal
264        (VmValue::Range(a), VmValue::Range(b)) => {
265            a.start == b.start && a.end == b.end && a.inclusive == b.inclusive
266        }
267        (VmValue::Iter(a), VmValue::Iter(b)) => Arc::ptr_eq(a, b),
268        (VmValue::Pair(a), VmValue::Pair(b)) => {
269            values_equal(&a.0, &b.0) && values_equal(&a.1, &b.1)
270        }
271        // Harness handles carry runtime capability state, not values. Two
272        // handles that refer to the same backing capability are still
273        // observed-distinct because the script never compares them. Returning
274        // `false` matches `Channel` / `Generator` / `Stream` precedent.
275        (VmValue::Harness(_), VmValue::Harness(_)) => false,
276        _ => false,
277    }
278}
279
280/// Structural de-duplication that honors `values_equal`, preserving
281/// first-occurrence order.
282///
283/// Candidates are bucketed by [`value_structural_hash_key`] (so the common
284/// case stays near-O(n)), then every hash hit is confirmed with
285/// [`values_equal`]. Because the hash key is consistent with `values_equal`,
286/// numerically-equal values that hash alike (e.g. `1` and `1.0`) collapse to a
287/// single entry, while hash collisions between unequal values (e.g. two `NaN`s
288/// sharing a bit pattern) never merge — matching the `==` operator exactly.
289pub fn dedup_values<'a, I>(items: I) -> Vec<VmValue>
290where
291    I: IntoIterator<Item = &'a VmValue>,
292{
293    use std::collections::HashMap;
294    let mut buckets: HashMap<String, Vec<VmValue>> = HashMap::new();
295    let mut result = Vec::new();
296    for item in items {
297        let bucket = buckets.entry(value_structural_hash_key(item)).or_default();
298        if !bucket.iter().any(|kept| values_equal(kept, item)) {
299            bucket.push(item.clone());
300            result.push(item.clone());
301        }
302    }
303    result
304}
305
306/// Total-order comparison used for sorting, `min`/`max`, and similar reductions.
307///
308/// IEEE-754 NaN is *unordered*, so [`try_compare_values`] returns `None` for it;
309/// here we fall back to `0` (treat as equal) so a stray NaN does not destabilize
310/// a sort. Relational operators (`<`, `>`, `<=`, `>=`) must NOT use this fallback —
311/// they go through [`try_compare_values`] so that any comparison with NaN yields
312/// `false`, as the language spec and IEEE-754 require.
313pub fn compare_values(a: &VmValue, b: &VmValue) -> i32 {
314    try_compare_values(a, b).unwrap_or(0)
315}
316
317/// Ordered comparison for relational operators. Returns `None` when the two
318/// values are *unordered* — i.e. a floating-point NaN is involved (directly, via
319/// an int/float mix, or nested inside a pair). Callers implementing `<`, `>`,
320/// `<=`, `>=` must treat `None` as "comparison is false".
321pub fn try_compare_values(a: &VmValue, b: &VmValue) -> Option<i32> {
322    match (a, b) {
323        (VmValue::Int(x), VmValue::Int(y)) => Some(x.cmp(y) as i32),
324        (VmValue::Float(x), VmValue::Float(y)) => float_ordering(*x, *y),
325        (VmValue::Int(x), VmValue::Float(y)) => float_ordering(*x as f64, *y),
326        (VmValue::Float(x), VmValue::Int(y)) => float_ordering(*x, *y as f64),
327        (VmValue::String(x), VmValue::String(y)) => Some(x.cmp(y) as i32),
328        (VmValue::Pair(x), VmValue::Pair(y)) => {
329            let c = try_compare_values(&x.0, &y.0)?;
330            if c != 0 {
331                Some(c)
332            } else {
333                try_compare_values(&x.1, &y.1)
334            }
335        }
336        _ => Some(0),
337    }
338}
339
340fn float_ordering(x: f64, y: f64) -> Option<i32> {
341    x.partial_cmp(&y).map(|ord| ord as i32)
342}