Skip to main content

haystack_core/graph/
value_index.rs

1// Value-level secondary indexes using B-Tree maps.
2//
3// Provides O(log N + result_size) range queries for comparison-based filters
4// (e.g. `temp > 72`, `area == 500`) instead of scanning all entities.
5
6use std::collections::{BTreeMap, HashMap};
7use std::ops::Bound;
8
9use crate::kinds::Kind;
10
11/// Orderable wrapper around a subset of Kind values that support comparison.
12///
13/// Only Number and Str are indexed (the most common comparison targets in
14/// Haystack filter expressions). Other kinds are silently skipped.
15#[derive(Debug, Clone)]
16enum OrderableKind {
17    Num(OrderedF64),
18    Str(String),
19}
20
21/// f64 wrapper with total ordering (NaN < everything, then normal f64 order).
22#[derive(Debug, Clone, Copy)]
23struct OrderedF64(f64);
24
25impl PartialEq for OrderedF64 {
26    fn eq(&self, other: &Self) -> bool {
27        self.0.total_cmp(&other.0) == std::cmp::Ordering::Equal
28    }
29}
30impl Eq for OrderedF64 {}
31
32impl PartialOrd for OrderedF64 {
33    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
34        Some(self.cmp(other))
35    }
36}
37impl Ord for OrderedF64 {
38    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39        self.0.total_cmp(&other.0)
40    }
41}
42
43impl PartialEq for OrderableKind {
44    fn eq(&self, other: &Self) -> bool {
45        self.cmp(other) == std::cmp::Ordering::Equal
46    }
47}
48impl Eq for OrderableKind {}
49
50impl PartialOrd for OrderableKind {
51    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
52        Some(self.cmp(other))
53    }
54}
55
56impl Ord for OrderableKind {
57    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58        match (self, other) {
59            (OrderableKind::Num(a), OrderableKind::Num(b)) => a.cmp(b),
60            (OrderableKind::Str(a), OrderableKind::Str(b)) => a.cmp(b),
61            // Numbers sort before strings for cross-type ordering.
62            (OrderableKind::Num(_), OrderableKind::Str(_)) => std::cmp::Ordering::Less,
63            (OrderableKind::Str(_), OrderableKind::Num(_)) => std::cmp::Ordering::Greater,
64        }
65    }
66}
67
68impl OrderableKind {
69    /// Try to convert a Kind value into an OrderableKind for indexing.
70    fn from_kind(kind: &Kind) -> Option<Self> {
71        match kind {
72            Kind::Number(n) => Some(OrderableKind::Num(OrderedF64(n.val))),
73            Kind::Str(s) => Some(OrderableKind::Str(s.clone())),
74            _ => None,
75        }
76    }
77}
78
79/// A collection of B-Tree indexes keyed by field name.
80///
81/// Each index maps orderable values to the set of entity IDs that have that
82/// value for the given field. Supports efficient range lookups.
83pub struct ValueIndex {
84    /// field_name → BTreeMap<value, Vec<entity_id>>
85    indexes: HashMap<String, BTreeMap<OrderableKind, Vec<usize>>>,
86}
87
88impl ValueIndex {
89    /// Create an empty value index.
90    pub fn new() -> Self {
91        Self {
92            indexes: HashMap::new(),
93        }
94    }
95
96    /// Register a field for indexing. Call this before adding entities.
97    pub fn index_field(&mut self, field: &str) {
98        self.indexes
99            .entry(field.to_string())
100            .or_insert_with(BTreeMap::new);
101    }
102
103    /// Returns the set of indexed field names.
104    pub fn indexed_fields(&self) -> impl Iterator<Item = &str> {
105        self.indexes.keys().map(|s| s.as_str())
106    }
107
108    /// Returns true if a given field has a value index.
109    pub fn has_index(&self, field: &str) -> bool {
110        self.indexes.contains_key(field)
111    }
112
113    /// Add an entity's value to the index for a given field.
114    pub fn add(&mut self, entity_id: usize, field: &str, value: &Kind) {
115        if let Some(tree) = self.indexes.get_mut(field)
116            && let Some(key) = OrderableKind::from_kind(value)
117        {
118            tree.entry(key).or_insert_with(Vec::new).push(entity_id);
119        }
120    }
121
122    /// Remove an entity from the index for a given field/value.
123    pub fn remove(&mut self, entity_id: usize, field: &str, value: &Kind) {
124        if let Some(tree) = self.indexes.get_mut(field)
125            && let Some(key) = OrderableKind::from_kind(value)
126        {
127            if let Some(ids) = tree.get_mut(&key) {
128                ids.retain(|&id| id != entity_id);
129                if ids.is_empty() {
130                    tree.remove(&key);
131                }
132            }
133        }
134    }
135
136    /// Look up entity IDs where field == val.
137    pub fn eq_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
138        let key = match OrderableKind::from_kind(val) {
139            Some(k) => k,
140            None => return Vec::new(),
141        };
142        self.indexes
143            .get(field)
144            .and_then(|tree| tree.get(&key))
145            .cloned()
146            .unwrap_or_default()
147    }
148
149    /// Look up entity IDs where field != val (all indexed minus exact match).
150    pub fn ne_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
151        let key = match OrderableKind::from_kind(val) {
152            Some(k) => k,
153            None => return Vec::new(),
154        };
155        let Some(tree) = self.indexes.get(field) else {
156            return Vec::new();
157        };
158        let mut result = Vec::new();
159        for (k, ids) in tree {
160            if k != &key {
161                result.extend(ids);
162            }
163        }
164        result
165    }
166
167    /// Look up entity IDs where field > val.
168    pub fn gt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
169        let key = match OrderableKind::from_kind(val) {
170            Some(k) => k,
171            None => return Vec::new(),
172        };
173        let Some(tree) = self.indexes.get(field) else {
174            return Vec::new();
175        };
176        let mut result = Vec::new();
177        for (_, ids) in tree.range((Bound::Excluded(key), Bound::Unbounded)) {
178            result.extend(ids);
179        }
180        result
181    }
182
183    /// Look up entity IDs where field >= val.
184    pub fn ge_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
185        let key = match OrderableKind::from_kind(val) {
186            Some(k) => k,
187            None => return Vec::new(),
188        };
189        let Some(tree) = self.indexes.get(field) else {
190            return Vec::new();
191        };
192        let mut result = Vec::new();
193        for (_, ids) in tree.range((Bound::Included(key), Bound::Unbounded)) {
194            result.extend(ids);
195        }
196        result
197    }
198
199    /// Look up entity IDs where field < val.
200    pub fn lt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
201        let key = match OrderableKind::from_kind(val) {
202            Some(k) => k,
203            None => return Vec::new(),
204        };
205        let Some(tree) = self.indexes.get(field) else {
206            return Vec::new();
207        };
208        let mut result = Vec::new();
209        for (_, ids) in tree.range((Bound::Unbounded, Bound::Excluded(key))) {
210            result.extend(ids);
211        }
212        result
213    }
214
215    /// Look up entity IDs where field <= val.
216    pub fn le_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
217        let key = match OrderableKind::from_kind(val) {
218            Some(k) => k,
219            None => return Vec::new(),
220        };
221        let Some(tree) = self.indexes.get(field) else {
222            return Vec::new();
223        };
224        let mut result = Vec::new();
225        for (_, ids) in tree.range((Bound::Unbounded, Bound::Included(key))) {
226            result.extend(ids);
227        }
228        result
229    }
230
231    /// Clear all indexes.
232    pub fn clear(&mut self) {
233        for tree in self.indexes.values_mut() {
234            tree.clear();
235        }
236    }
237}
238
239impl Default for ValueIndex {
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::kinds::Number;
249
250    #[test]
251    fn eq_lookup_returns_matching_ids() {
252        let mut idx = ValueIndex::new();
253        idx.index_field("temp");
254        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
255        idx.add(1, "temp", &Kind::Number(Number::unitless(68.0)));
256        idx.add(2, "temp", &Kind::Number(Number::unitless(72.0)));
257
258        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
259        assert_eq!(result, vec![0, 2]);
260    }
261
262    #[test]
263    fn gt_lookup_returns_greater_ids() {
264        let mut idx = ValueIndex::new();
265        idx.index_field("area");
266        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
267        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
268        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
269        idx.add(3, "area", &Kind::Number(Number::unitless(50.0)));
270
271        let result = idx.gt_lookup("area", &Kind::Number(Number::unitless(150.0)));
272        assert!(result.contains(&2)); // 200
273        assert!(result.contains(&1)); // 500
274        assert!(!result.contains(&0)); // 100
275        assert!(!result.contains(&3)); // 50
276    }
277
278    #[test]
279    fn lt_lookup_returns_lesser_ids() {
280        let mut idx = ValueIndex::new();
281        idx.index_field("area");
282        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
283        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
284        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
285
286        let result = idx.lt_lookup("area", &Kind::Number(Number::unitless(200.0)));
287        assert_eq!(result, vec![0]); // 100 < 200
288    }
289
290    #[test]
291    fn string_index_works() {
292        let mut idx = ValueIndex::new();
293        idx.index_field("dis");
294        idx.add(0, "dis", &Kind::Str("Alpha".to_string()));
295        idx.add(1, "dis", &Kind::Str("Beta".to_string()));
296        idx.add(2, "dis", &Kind::Str("Alpha".to_string()));
297
298        let result = idx.eq_lookup("dis", &Kind::Str("Alpha".to_string()));
299        assert_eq!(result, vec![0, 2]);
300    }
301
302    #[test]
303    fn unindexed_field_returns_empty() {
304        let idx = ValueIndex::new();
305        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
306        assert!(result.is_empty());
307    }
308
309    #[test]
310    fn remove_entity_from_index() {
311        let mut idx = ValueIndex::new();
312        idx.index_field("temp");
313        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
314        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
315
316        idx.remove(0, "temp", &Kind::Number(Number::unitless(72.0)));
317
318        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
319        assert_eq!(result, vec![1]);
320    }
321
322    #[test]
323    fn ne_lookup_excludes_matching() {
324        let mut idx = ValueIndex::new();
325        idx.index_field("status");
326        idx.add(0, "status", &Kind::Str("active".to_string()));
327        idx.add(1, "status", &Kind::Str("inactive".to_string()));
328        idx.add(2, "status", &Kind::Str("active".to_string()));
329
330        let result = idx.ne_lookup("status", &Kind::Str("active".to_string()));
331        assert_eq!(result, vec![1]);
332    }
333
334    #[test]
335    fn ge_and_le_lookups() {
336        let mut idx = ValueIndex::new();
337        idx.index_field("temp");
338        idx.add(0, "temp", &Kind::Number(Number::unitless(70.0)));
339        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
340        idx.add(2, "temp", &Kind::Number(Number::unitless(74.0)));
341
342        let ge = idx.ge_lookup("temp", &Kind::Number(Number::unitless(72.0)));
343        assert!(ge.contains(&1)); // 72
344        assert!(ge.contains(&2)); // 74
345        assert!(!ge.contains(&0)); // 70
346
347        let le = idx.le_lookup("temp", &Kind::Number(Number::unitless(72.0)));
348        assert!(le.contains(&0)); // 70
349        assert!(le.contains(&1)); // 72
350        assert!(!le.contains(&2)); // 74
351    }
352}