Skip to main content

harn_vm/value/
set.rs

1use std::collections::HashSet;
2use std::sync::Arc;
3
4use super::{value_structural_hash_key, VmValue};
5
6/// Backing store for [`VmValue::Set`](super::VmValue::Set).
7///
8/// A Harn set is an *ordered* collection that deduplicates by structural
9/// equality. Membership used to be answered by rebuilding a
10/// `HashSet<String>` of structural hash keys from the backing `Vec` on every
11/// `contains` / `union` / `intersect` / … call — O(n) work (plus an
12/// allocation) per query. `VmSet` keeps that index resident alongside the
13/// items, so membership is O(1) and the set-algebra builtins drop from
14/// rebuild-per-call to a single key computation per probe.
15///
16/// Invariants:
17/// * `keys` holds exactly one entry per item: `value_structural_hash_key(item)`.
18/// * Insertion order is preserved in `items` (iteration, display, and
19///   serialization are order-stable); equality and hashing stay
20///   order-independent, matching the language spec.
21#[derive(Debug, Clone, Default)]
22pub struct VmSet {
23    /// Items in insertion order. Stored behind an `Arc` so iteration can share
24    /// the backing buffer with a `VmIter`/`IterState` without copying, and
25    /// value-semantic mutation clones copy-on-write via `Arc::make_mut`.
26    items: Arc<Vec<VmValue>>,
27    keys: HashSet<String>,
28}
29
30impl VmSet {
31    pub fn new() -> Self {
32        Self::default()
33    }
34
35    pub fn with_capacity(capacity: usize) -> Self {
36        Self {
37            items: Arc::new(Vec::with_capacity(capacity)),
38            keys: HashSet::with_capacity(capacity),
39        }
40    }
41
42    /// Insert `value`, returning `true` when it was newly added (its structural
43    /// key was not already present).
44    pub fn insert(&mut self, value: VmValue) -> bool {
45        let key = value_structural_hash_key(&value);
46        if self.keys.insert(key) {
47            Arc::make_mut(&mut self.items).push(value);
48            true
49        } else {
50            false
51        }
52    }
53
54    /// O(1) membership test by structural equality.
55    pub fn contains(&self, value: &VmValue) -> bool {
56        self.keys.contains(&value_structural_hash_key(value))
57    }
58
59    /// Remove `value` if present, returning `true` when it was removed.
60    pub fn remove(&mut self, value: &VmValue) -> bool {
61        let key = value_structural_hash_key(value);
62        if self.keys.remove(&key) {
63            Arc::make_mut(&mut self.items).retain(|item| value_structural_hash_key(item) != key);
64            true
65        } else {
66            false
67        }
68    }
69
70    pub fn len(&self) -> usize {
71        self.items.len()
72    }
73
74    pub fn is_empty(&self) -> bool {
75        self.items.is_empty()
76    }
77
78    pub fn iter(&self) -> std::slice::Iter<'_, VmValue> {
79        self.items.iter()
80    }
81
82    pub fn items(&self) -> &[VmValue] {
83        self.items.as_slice()
84    }
85
86    /// The shared backing buffer, for iterators that walk a set without
87    /// copying its elements (a refcount bump rather than an O(n) clone).
88    pub fn shared_items(&self) -> Arc<Vec<VmValue>> {
89        Arc::clone(&self.items)
90    }
91
92    /// Consume the set, yielding its items in insertion order. Used by the
93    /// iterative teardown path so deeply nested sets drop without recursing.
94    pub fn into_items(self) -> Vec<VmValue> {
95        Arc::try_unwrap(self.items).unwrap_or_else(|items| (*items).clone())
96    }
97
98    /// `self ∪ other`, preserving `self`'s order then `other`'s new items.
99    pub fn union(&self, other: &VmSet) -> VmSet {
100        let mut out = self.clone();
101        for value in other.iter() {
102            out.insert(value.clone());
103        }
104        out
105    }
106
107    /// `self ∩ other` — items of `self` also present in `other`.
108    pub fn intersect(&self, other: &VmSet) -> VmSet {
109        self.items
110            .iter()
111            .filter(|item| other.contains(item))
112            .cloned()
113            .collect()
114    }
115
116    /// `self \ other` — items of `self` not present in `other`.
117    pub fn difference(&self, other: &VmSet) -> VmSet {
118        self.items
119            .iter()
120            .filter(|item| !other.contains(item))
121            .cloned()
122            .collect()
123    }
124
125    /// `self △ other` — items in exactly one of the two sets.
126    pub fn symmetric_difference(&self, other: &VmSet) -> VmSet {
127        let mut out: VmSet = self
128            .iter()
129            .filter(|item| !other.contains(item))
130            .cloned()
131            .collect();
132        for value in other.iter() {
133            if !self.contains(value) {
134                out.insert(value.clone());
135            }
136        }
137        out
138    }
139
140    pub fn is_subset(&self, other: &VmSet) -> bool {
141        self.items.iter().all(|item| other.contains(item))
142    }
143
144    pub fn is_superset(&self, other: &VmSet) -> bool {
145        other.is_subset(self)
146    }
147
148    pub fn is_disjoint(&self, other: &VmSet) -> bool {
149        !self.items.iter().any(|item| other.contains(item))
150    }
151
152    /// Structural keys in sorted order. Backs the order-independent set hash
153    /// in [`value_structural_hash_key`](super::value_structural_hash_key)
154    /// without re-deriving a key per element.
155    pub fn sorted_keys(&self) -> Vec<&str> {
156        let mut keys: Vec<&str> = self.keys.iter().map(String::as_str).collect();
157        keys.sort_unstable();
158        keys
159    }
160}
161
162impl FromIterator<VmValue> for VmSet {
163    fn from_iter<I: IntoIterator<Item = VmValue>>(iter: I) -> Self {
164        let iter = iter.into_iter();
165        let mut set = VmSet::with_capacity(iter.size_hint().0);
166        for value in iter {
167            set.insert(value);
168        }
169        set
170    }
171}
172
173impl<'a> IntoIterator for &'a VmSet {
174    type Item = &'a VmValue;
175    type IntoIter = std::slice::Iter<'a, VmValue>;
176
177    fn into_iter(self) -> Self::IntoIter {
178        self.iter()
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use crate::value::{value_structural_hash_key, values_equal};
186
187    fn int_set(values: &[i64]) -> VmSet {
188        values.iter().map(|n| VmValue::Int(*n)).collect()
189    }
190
191    #[test]
192    fn insert_dedups_and_preserves_first_seen_order() {
193        let set = int_set(&[3, 1, 3, 2, 1]);
194        assert_eq!(set.len(), 3);
195        let order: Vec<i64> = set
196            .iter()
197            .map(|v| match v {
198                VmValue::Int(n) => *n,
199                _ => unreachable!(),
200            })
201            .collect();
202        assert_eq!(order, vec![3, 1, 2]);
203    }
204
205    #[test]
206    fn contains_is_structural() {
207        let set = int_set(&[1, 2, 3]);
208        assert!(set.contains(&VmValue::Int(2)));
209        assert!(!set.contains(&VmValue::Int(9)));
210        // `1` and `1.0` are structurally equal in Harn.
211        assert!(set.contains(&VmValue::Float(1.0)));
212    }
213
214    #[test]
215    fn remove_updates_index_and_order() {
216        let mut set = int_set(&[1, 2, 3]);
217        assert!(set.remove(&VmValue::Int(2)));
218        assert!(!set.remove(&VmValue::Int(2)));
219        assert!(!set.contains(&VmValue::Int(2)));
220        assert_eq!(set.len(), 2);
221    }
222
223    #[test]
224    fn set_algebra() {
225        let a = int_set(&[1, 2, 3]);
226        let b = int_set(&[2, 3, 4]);
227        assert_eq!(a.union(&b).len(), 4);
228        assert_eq!(a.intersect(&b).len(), 2);
229        assert!(a.intersect(&b).contains(&VmValue::Int(2)));
230        let diff = a.difference(&b);
231        assert_eq!(diff.len(), 1);
232        assert!(diff.contains(&VmValue::Int(1)));
233        let sym = a.symmetric_difference(&b);
234        assert_eq!(sym.len(), 2);
235        assert!(sym.contains(&VmValue::Int(1)) && sym.contains(&VmValue::Int(4)));
236        assert!(int_set(&[1, 2]).is_subset(&a));
237        assert!(a.is_superset(&int_set(&[1, 2])));
238        assert!(a.is_disjoint(&int_set(&[7, 8])));
239        assert!(!a.is_disjoint(&b));
240    }
241
242    #[test]
243    fn equality_and_hash_are_order_independent() {
244        let a = VmValue::set([VmValue::Int(1), VmValue::Int(2), VmValue::Int(3)]);
245        let b = VmValue::set([VmValue::Int(3), VmValue::Int(2), VmValue::Int(1)]);
246        assert!(values_equal(&a, &b));
247        assert_eq!(value_structural_hash_key(&a), value_structural_hash_key(&b));
248        let c = VmValue::set([VmValue::Int(1), VmValue::Int(2)]);
249        assert!(!values_equal(&a, &c));
250    }
251}