Skip to main content

haystack_core/graph/
value_index.rs

1// Value-level secondary indexes using B-Tree maps.
2//
3// Provides O(log N + result_size) range queries for comparison-based filters
4// (e.g. `temp > 72`, `area == 500`) instead of scanning all entities.
5
6use std::collections::{BTreeMap, HashMap};
7use std::ops::Bound;
8
9use crate::kinds::Kind;
10
11/// Orderable wrapper around a subset of Kind values that support comparison.
12///
13/// Only Number and Str are indexed (the most common comparison targets in
14/// Haystack filter expressions). Other kinds are silently skipped.
15#[derive(Debug, Clone)]
16enum OrderableKind {
17    Num(OrderedF64),
18    Str(String),
19}
20
21/// f64 wrapper with total ordering (NaN < everything, then normal f64 order).
22#[derive(Debug, Clone, Copy)]
23struct OrderedF64(f64);
24
25impl PartialEq for OrderedF64 {
26    fn eq(&self, other: &Self) -> bool {
27        self.0.total_cmp(&other.0) == std::cmp::Ordering::Equal
28    }
29}
30impl Eq for OrderedF64 {}
31
32impl PartialOrd for OrderedF64 {
33    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
34        Some(self.cmp(other))
35    }
36}
37impl Ord for OrderedF64 {
38    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39        self.0.total_cmp(&other.0)
40    }
41}
42
43impl PartialEq for OrderableKind {
44    fn eq(&self, other: &Self) -> bool {
45        self.cmp(other) == std::cmp::Ordering::Equal
46    }
47}
48impl Eq for OrderableKind {}
49
50impl PartialOrd for OrderableKind {
51    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
52        Some(self.cmp(other))
53    }
54}
55
56impl Ord for OrderableKind {
57    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58        match (self, other) {
59            (OrderableKind::Num(a), OrderableKind::Num(b)) => a.cmp(b),
60            (OrderableKind::Str(a), OrderableKind::Str(b)) => a.cmp(b),
61            // Numbers sort before strings for cross-type ordering.
62            (OrderableKind::Num(_), OrderableKind::Str(_)) => std::cmp::Ordering::Less,
63            (OrderableKind::Str(_), OrderableKind::Num(_)) => std::cmp::Ordering::Greater,
64        }
65    }
66}
67
68impl OrderableKind {
69    /// Try to convert a Kind value into an OrderableKind for indexing.
70    fn from_kind(kind: &Kind) -> Option<Self> {
71        match kind {
72            Kind::Number(n) => Some(OrderableKind::Num(OrderedF64(n.val))),
73            Kind::Str(s) => Some(OrderableKind::Str(s.clone())),
74            _ => None,
75        }
76    }
77}
78
79/// A collection of B-Tree indexes keyed by field name.
80///
81/// Each index maps orderable values to the set of entity IDs that have that
82/// value for the given field. Supports efficient range lookups.
83pub struct ValueIndex {
84    /// field_name → BTreeMap<value, Vec<entity_id>>
85    indexes: HashMap<String, BTreeMap<OrderableKind, Vec<usize>>>,
86}
87
88impl ValueIndex {
89    /// Create an empty value index.
90    pub fn new() -> Self {
91        Self {
92            indexes: HashMap::new(),
93        }
94    }
95
96    /// Register a field for indexing. Call this before adding entities.
97    pub fn index_field(&mut self, field: &str) {
98        self.indexes.entry(field.to_string()).or_default();
99    }
100
101    /// Returns the set of indexed field names.
102    pub fn indexed_fields(&self) -> impl Iterator<Item = &str> {
103        self.indexes.keys().map(|s| s.as_str())
104    }
105
106    /// Returns true if a given field has a value index.
107    pub fn has_index(&self, field: &str) -> bool {
108        self.indexes.contains_key(field)
109    }
110
111    /// Add an entity's value to the index for a given field.
112    pub fn add(&mut self, entity_id: usize, field: &str, value: &Kind) {
113        if let Some(tree) = self.indexes.get_mut(field)
114            && let Some(key) = OrderableKind::from_kind(value)
115        {
116            tree.entry(key).or_default().push(entity_id);
117        }
118    }
119
120    /// Remove an entity from the index for a given field/value.
121    pub fn remove(&mut self, entity_id: usize, field: &str, value: &Kind) {
122        if let Some(tree) = self.indexes.get_mut(field)
123            && let Some(key) = OrderableKind::from_kind(value)
124            && let Some(ids) = tree.get_mut(&key)
125        {
126            ids.retain(|&id| id != entity_id);
127            if ids.is_empty() {
128                tree.remove(&key);
129            }
130        }
131    }
132
133    /// Look up entity IDs where field == val.
134    pub fn eq_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
135        let key = match OrderableKind::from_kind(val) {
136            Some(k) => k,
137            None => return Vec::new(),
138        };
139        self.indexes
140            .get(field)
141            .and_then(|tree| tree.get(&key))
142            .cloned()
143            .unwrap_or_default()
144    }
145
146    /// Look up entity IDs where field != val (all indexed minus exact match).
147    ///
148    /// Optimized: collects all IDs for the field and excludes the matching set,
149    /// avoiding iteration of the entire BTreeMap.
150    pub fn ne_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
151        let key = match OrderableKind::from_kind(val) {
152            Some(k) => k,
153            None => return Vec::new(),
154        };
155        let Some(tree) = self.indexes.get(field) else {
156            return Vec::new();
157        };
158        // Get the matching set to exclude.
159        let exclude: &[usize] = tree.get(&key).map(|v| v.as_slice()).unwrap_or(&[]);
160        if exclude.is_empty() {
161            // Nothing to exclude — return all IDs for this field.
162            let mut result = Vec::new();
163            for ids in tree.values() {
164                result.extend(ids);
165            }
166            return result;
167        }
168        // Collect non-matching entries only.
169        let mut result = Vec::new();
170        for (k, ids) in tree {
171            if k != &key {
172                result.extend(ids);
173            }
174        }
175        result
176    }
177
178    /// Look up entity IDs where field > val.
179    pub fn gt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
180        let key = match OrderableKind::from_kind(val) {
181            Some(k) => k,
182            None => return Vec::new(),
183        };
184        let Some(tree) = self.indexes.get(field) else {
185            return Vec::new();
186        };
187        let mut result = Vec::new();
188        for (_, ids) in tree.range((Bound::Excluded(key), Bound::Unbounded)) {
189            result.extend(ids);
190        }
191        result
192    }
193
194    /// Look up entity IDs where field >= val.
195    pub fn ge_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
196        let key = match OrderableKind::from_kind(val) {
197            Some(k) => k,
198            None => return Vec::new(),
199        };
200        let Some(tree) = self.indexes.get(field) else {
201            return Vec::new();
202        };
203        let mut result = Vec::new();
204        for (_, ids) in tree.range((Bound::Included(key), Bound::Unbounded)) {
205            result.extend(ids);
206        }
207        result
208    }
209
210    /// Look up entity IDs where field < val.
211    pub fn lt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
212        let key = match OrderableKind::from_kind(val) {
213            Some(k) => k,
214            None => return Vec::new(),
215        };
216        let Some(tree) = self.indexes.get(field) else {
217            return Vec::new();
218        };
219        let mut result = Vec::new();
220        for (_, ids) in tree.range((Bound::Unbounded, Bound::Excluded(key))) {
221            result.extend(ids);
222        }
223        result
224    }
225
226    /// Look up entity IDs where field <= val.
227    pub fn le_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
228        let key = match OrderableKind::from_kind(val) {
229            Some(k) => k,
230            None => return Vec::new(),
231        };
232        let Some(tree) = self.indexes.get(field) else {
233            return Vec::new();
234        };
235        let mut result = Vec::new();
236        for (_, ids) in tree.range((Bound::Unbounded, Bound::Included(key))) {
237            result.extend(ids);
238        }
239        result
240    }
241
242    /// Clear all indexes.
243    pub fn clear(&mut self) {
244        for tree in self.indexes.values_mut() {
245            tree.clear();
246        }
247    }
248}
249
250impl Default for ValueIndex {
251    fn default() -> Self {
252        Self::new()
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use crate::kinds::Number;
260
261    #[test]
262    fn eq_lookup_returns_matching_ids() {
263        let mut idx = ValueIndex::new();
264        idx.index_field("temp");
265        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
266        idx.add(1, "temp", &Kind::Number(Number::unitless(68.0)));
267        idx.add(2, "temp", &Kind::Number(Number::unitless(72.0)));
268
269        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
270        assert_eq!(result, vec![0, 2]);
271    }
272
273    #[test]
274    fn gt_lookup_returns_greater_ids() {
275        let mut idx = ValueIndex::new();
276        idx.index_field("area");
277        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
278        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
279        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
280        idx.add(3, "area", &Kind::Number(Number::unitless(50.0)));
281
282        let result = idx.gt_lookup("area", &Kind::Number(Number::unitless(150.0)));
283        assert!(result.contains(&2)); // 200
284        assert!(result.contains(&1)); // 500
285        assert!(!result.contains(&0)); // 100
286        assert!(!result.contains(&3)); // 50
287    }
288
289    #[test]
290    fn lt_lookup_returns_lesser_ids() {
291        let mut idx = ValueIndex::new();
292        idx.index_field("area");
293        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
294        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
295        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
296
297        let result = idx.lt_lookup("area", &Kind::Number(Number::unitless(200.0)));
298        assert_eq!(result, vec![0]); // 100 < 200
299    }
300
301    #[test]
302    fn string_index_works() {
303        let mut idx = ValueIndex::new();
304        idx.index_field("dis");
305        idx.add(0, "dis", &Kind::Str("Alpha".to_string()));
306        idx.add(1, "dis", &Kind::Str("Beta".to_string()));
307        idx.add(2, "dis", &Kind::Str("Alpha".to_string()));
308
309        let result = idx.eq_lookup("dis", &Kind::Str("Alpha".to_string()));
310        assert_eq!(result, vec![0, 2]);
311    }
312
313    #[test]
314    fn unindexed_field_returns_empty() {
315        let idx = ValueIndex::new();
316        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
317        assert!(result.is_empty());
318    }
319
320    #[test]
321    fn remove_entity_from_index() {
322        let mut idx = ValueIndex::new();
323        idx.index_field("temp");
324        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
325        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
326
327        idx.remove(0, "temp", &Kind::Number(Number::unitless(72.0)));
328
329        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
330        assert_eq!(result, vec![1]);
331    }
332
333    #[test]
334    fn ne_lookup_excludes_matching() {
335        let mut idx = ValueIndex::new();
336        idx.index_field("status");
337        idx.add(0, "status", &Kind::Str("active".to_string()));
338        idx.add(1, "status", &Kind::Str("inactive".to_string()));
339        idx.add(2, "status", &Kind::Str("active".to_string()));
340
341        let result = idx.ne_lookup("status", &Kind::Str("active".to_string()));
342        assert_eq!(result, vec![1]);
343    }
344
345    #[test]
346    fn ge_and_le_lookups() {
347        let mut idx = ValueIndex::new();
348        idx.index_field("temp");
349        idx.add(0, "temp", &Kind::Number(Number::unitless(70.0)));
350        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
351        idx.add(2, "temp", &Kind::Number(Number::unitless(74.0)));
352
353        let ge = idx.ge_lookup("temp", &Kind::Number(Number::unitless(72.0)));
354        assert!(ge.contains(&1)); // 72
355        assert!(ge.contains(&2)); // 74
356        assert!(!ge.contains(&0)); // 70
357
358        let le = idx.le_lookup("temp", &Kind::Number(Number::unitless(72.0)));
359        assert!(le.contains(&0)); // 70
360        assert!(le.contains(&1)); // 72
361        assert!(!le.contains(&2)); // 74
362    }
363}