Skip to main content

haystack_core/graph/
value_index.rs

1// Value-level secondary indexes using B-Tree maps.
2//
3// Provides O(log N + result_size) range queries for comparison-based filters
4// (e.g. `temp > 72`, `area == 500`) instead of scanning all entities.
5
6use std::collections::{BTreeMap, HashMap};
7use std::ops::Bound;
8
9use crate::kinds::Kind;
10
11/// Orderable wrapper around a subset of Kind values that support comparison.
12///
13/// Only Number and Str are indexed (the most common comparison targets in
14/// Haystack filter expressions). Other kinds are silently skipped.
15#[derive(Debug, Clone)]
16enum OrderableKind {
17    Num(OrderedF64),
18    Str(String),
19}
20
21/// f64 wrapper with total ordering (NaN < everything, then normal f64 order).
22#[derive(Debug, Clone, Copy)]
23struct OrderedF64(f64);
24
25impl PartialEq for OrderedF64 {
26    fn eq(&self, other: &Self) -> bool {
27        self.0.total_cmp(&other.0) == std::cmp::Ordering::Equal
28    }
29}
30impl Eq for OrderedF64 {}
31
32impl PartialOrd for OrderedF64 {
33    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
34        Some(self.cmp(other))
35    }
36}
37impl Ord for OrderedF64 {
38    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39        self.0.total_cmp(&other.0)
40    }
41}
42
43impl PartialEq for OrderableKind {
44    fn eq(&self, other: &Self) -> bool {
45        self.cmp(other) == std::cmp::Ordering::Equal
46    }
47}
48impl Eq for OrderableKind {}
49
50impl PartialOrd for OrderableKind {
51    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
52        Some(self.cmp(other))
53    }
54}
55
56impl Ord for OrderableKind {
57    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58        match (self, other) {
59            (OrderableKind::Num(a), OrderableKind::Num(b)) => a.cmp(b),
60            (OrderableKind::Str(a), OrderableKind::Str(b)) => a.cmp(b),
61            // Numbers sort before strings for cross-type ordering.
62            (OrderableKind::Num(_), OrderableKind::Str(_)) => std::cmp::Ordering::Less,
63            (OrderableKind::Str(_), OrderableKind::Num(_)) => std::cmp::Ordering::Greater,
64        }
65    }
66}
67
68impl OrderableKind {
69    /// Try to convert a Kind value into an OrderableKind for indexing.
70    fn from_kind(kind: &Kind) -> Option<Self> {
71        match kind {
72            Kind::Number(n) => Some(OrderableKind::Num(OrderedF64(n.val))),
73            Kind::Str(s) => Some(OrderableKind::Str(s.clone())),
74            _ => None,
75        }
76    }
77}
78
79/// A collection of B-Tree indexes keyed by field name.
80///
81/// Each index maps orderable values to the set of entity IDs that have that
82/// value for the given field. Supports efficient range lookups.
83pub struct ValueIndex {
84    /// field_name → BTreeMap<value, Vec<entity_id>>
85    indexes: HashMap<String, BTreeMap<OrderableKind, Vec<usize>>>,
86}
87
88impl ValueIndex {
89    /// Create an empty value index.
90    pub fn new() -> Self {
91        Self {
92            indexes: HashMap::new(),
93        }
94    }
95
96    /// Register a field for indexing. Call this before adding entities.
97    pub fn index_field(&mut self, field: &str) {
98        self.indexes.entry(field.to_string()).or_default();
99    }
100
101    /// Returns the set of indexed field names.
102    pub fn indexed_fields(&self) -> impl Iterator<Item = &str> {
103        self.indexes.keys().map(|s| s.as_str())
104    }
105
106    /// Returns true if a given field has a value index.
107    pub fn has_index(&self, field: &str) -> bool {
108        self.indexes.contains_key(field)
109    }
110
111    /// Add an entity's value to the index for a given field.
112    pub fn add(&mut self, entity_id: usize, field: &str, value: &Kind) {
113        if let Some(tree) = self.indexes.get_mut(field)
114            && let Some(key) = OrderableKind::from_kind(value)
115        {
116            tree.entry(key).or_default().push(entity_id);
117        }
118    }
119
120    /// Remove an entity from the index for a given field/value.
121    pub fn remove(&mut self, entity_id: usize, field: &str, value: &Kind) {
122        if let Some(tree) = self.indexes.get_mut(field)
123            && let Some(key) = OrderableKind::from_kind(value)
124            && let Some(ids) = tree.get_mut(&key)
125        {
126            ids.retain(|&id| id != entity_id);
127            if ids.is_empty() {
128                tree.remove(&key);
129            }
130        }
131    }
132
133    /// Look up entity IDs where field == val.
134    pub fn eq_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
135        let key = match OrderableKind::from_kind(val) {
136            Some(k) => k,
137            None => return Vec::new(),
138        };
139        self.indexes
140            .get(field)
141            .and_then(|tree| tree.get(&key))
142            .cloned()
143            .unwrap_or_default()
144    }
145
146    /// Look up entity IDs where field != val (all indexed minus exact match).
147    ///
148    /// Optimized: collects all IDs for the field and excludes the matching set,
149    /// avoiding iteration of the entire BTreeMap.
150    pub fn ne_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
151        let key = match OrderableKind::from_kind(val) {
152            Some(k) => k,
153            None => return Vec::new(),
154        };
155        let Some(tree) = self.indexes.get(field) else {
156            return Vec::new();
157        };
158        // Get the matching set to exclude.
159        let exclude: &[usize] = tree.get(&key).map(|v| v.as_slice()).unwrap_or(&[]);
160        if exclude.is_empty() {
161            // Nothing to exclude — return all IDs for this field.
162            let mut result = Vec::new();
163            for ids in tree.values() {
164                result.extend(ids);
165            }
166            return result;
167        }
168        // Collect non-matching entries only.
169        let exclude_set: std::collections::HashSet<usize> = exclude.iter().copied().collect();
170        let mut result = Vec::new();
171        for (k, ids) in tree {
172            if k != &key {
173                result.extend(ids);
174            }
175        }
176        let _ = exclude_set; // used for documentation clarity
177        result
178    }
179
180    /// Look up entity IDs where field > val.
181    pub fn gt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
182        let key = match OrderableKind::from_kind(val) {
183            Some(k) => k,
184            None => return Vec::new(),
185        };
186        let Some(tree) = self.indexes.get(field) else {
187            return Vec::new();
188        };
189        let mut result = Vec::new();
190        for (_, ids) in tree.range((Bound::Excluded(key), Bound::Unbounded)) {
191            result.extend(ids);
192        }
193        result
194    }
195
196    /// Look up entity IDs where field >= val.
197    pub fn ge_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
198        let key = match OrderableKind::from_kind(val) {
199            Some(k) => k,
200            None => return Vec::new(),
201        };
202        let Some(tree) = self.indexes.get(field) else {
203            return Vec::new();
204        };
205        let mut result = Vec::new();
206        for (_, ids) in tree.range((Bound::Included(key), Bound::Unbounded)) {
207            result.extend(ids);
208        }
209        result
210    }
211
212    /// Look up entity IDs where field < val.
213    pub fn lt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
214        let key = match OrderableKind::from_kind(val) {
215            Some(k) => k,
216            None => return Vec::new(),
217        };
218        let Some(tree) = self.indexes.get(field) else {
219            return Vec::new();
220        };
221        let mut result = Vec::new();
222        for (_, ids) in tree.range((Bound::Unbounded, Bound::Excluded(key))) {
223            result.extend(ids);
224        }
225        result
226    }
227
228    /// Look up entity IDs where field <= val.
229    pub fn le_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
230        let key = match OrderableKind::from_kind(val) {
231            Some(k) => k,
232            None => return Vec::new(),
233        };
234        let Some(tree) = self.indexes.get(field) else {
235            return Vec::new();
236        };
237        let mut result = Vec::new();
238        for (_, ids) in tree.range((Bound::Unbounded, Bound::Included(key))) {
239            result.extend(ids);
240        }
241        result
242    }
243
244    /// Clear all indexes.
245    pub fn clear(&mut self) {
246        for tree in self.indexes.values_mut() {
247            tree.clear();
248        }
249    }
250}
251
252impl Default for ValueIndex {
253    fn default() -> Self {
254        Self::new()
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::kinds::Number;
262
263    #[test]
264    fn eq_lookup_returns_matching_ids() {
265        let mut idx = ValueIndex::new();
266        idx.index_field("temp");
267        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
268        idx.add(1, "temp", &Kind::Number(Number::unitless(68.0)));
269        idx.add(2, "temp", &Kind::Number(Number::unitless(72.0)));
270
271        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
272        assert_eq!(result, vec![0, 2]);
273    }
274
275    #[test]
276    fn gt_lookup_returns_greater_ids() {
277        let mut idx = ValueIndex::new();
278        idx.index_field("area");
279        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
280        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
281        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
282        idx.add(3, "area", &Kind::Number(Number::unitless(50.0)));
283
284        let result = idx.gt_lookup("area", &Kind::Number(Number::unitless(150.0)));
285        assert!(result.contains(&2)); // 200
286        assert!(result.contains(&1)); // 500
287        assert!(!result.contains(&0)); // 100
288        assert!(!result.contains(&3)); // 50
289    }
290
291    #[test]
292    fn lt_lookup_returns_lesser_ids() {
293        let mut idx = ValueIndex::new();
294        idx.index_field("area");
295        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
296        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
297        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
298
299        let result = idx.lt_lookup("area", &Kind::Number(Number::unitless(200.0)));
300        assert_eq!(result, vec![0]); // 100 < 200
301    }
302
303    #[test]
304    fn string_index_works() {
305        let mut idx = ValueIndex::new();
306        idx.index_field("dis");
307        idx.add(0, "dis", &Kind::Str("Alpha".to_string()));
308        idx.add(1, "dis", &Kind::Str("Beta".to_string()));
309        idx.add(2, "dis", &Kind::Str("Alpha".to_string()));
310
311        let result = idx.eq_lookup("dis", &Kind::Str("Alpha".to_string()));
312        assert_eq!(result, vec![0, 2]);
313    }
314
315    #[test]
316    fn unindexed_field_returns_empty() {
317        let idx = ValueIndex::new();
318        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
319        assert!(result.is_empty());
320    }
321
322    #[test]
323    fn remove_entity_from_index() {
324        let mut idx = ValueIndex::new();
325        idx.index_field("temp");
326        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
327        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
328
329        idx.remove(0, "temp", &Kind::Number(Number::unitless(72.0)));
330
331        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
332        assert_eq!(result, vec![1]);
333    }
334
335    #[test]
336    fn ne_lookup_excludes_matching() {
337        let mut idx = ValueIndex::new();
338        idx.index_field("status");
339        idx.add(0, "status", &Kind::Str("active".to_string()));
340        idx.add(1, "status", &Kind::Str("inactive".to_string()));
341        idx.add(2, "status", &Kind::Str("active".to_string()));
342
343        let result = idx.ne_lookup("status", &Kind::Str("active".to_string()));
344        assert_eq!(result, vec![1]);
345    }
346
347    #[test]
348    fn ge_and_le_lookups() {
349        let mut idx = ValueIndex::new();
350        idx.index_field("temp");
351        idx.add(0, "temp", &Kind::Number(Number::unitless(70.0)));
352        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
353        idx.add(2, "temp", &Kind::Number(Number::unitless(74.0)));
354
355        let ge = idx.ge_lookup("temp", &Kind::Number(Number::unitless(72.0)));
356        assert!(ge.contains(&1)); // 72
357        assert!(ge.contains(&2)); // 74
358        assert!(!ge.contains(&0)); // 70
359
360        let le = idx.le_lookup("temp", &Kind::Number(Number::unitless(72.0)));
361        assert!(le.contains(&0)); // 70
362        assert!(le.contains(&1)); // 72
363        assert!(!le.contains(&2)); // 74
364    }
365}