Skip to main content

haystack_core/graph/
value_index.rs

1// Value-level secondary indexes using B-Tree maps.
2//
3// Provides O(log N + result_size) range queries for comparison-based filters
4// (e.g. `temp > 72`, `area == 500`) instead of scanning all entities.
5
6use std::collections::{BTreeMap, HashMap};
7use std::ops::Bound;
8
9use crate::kinds::Kind;
10
11/// Orderable wrapper around a subset of Kind values that support comparison.
12///
13/// Only Number and Str are indexed (the most common comparison targets in
14/// Haystack filter expressions). Other kinds are silently skipped.
15#[derive(Debug, Clone)]
16enum OrderableKind {
17    Num(OrderedF64),
18    Str(String),
19}
20
21/// f64 wrapper with total ordering (NaN < everything, then normal f64 order).
22#[derive(Debug, Clone, Copy)]
23struct OrderedF64(f64);
24
25impl PartialEq for OrderedF64 {
26    fn eq(&self, other: &Self) -> bool {
27        self.0.total_cmp(&other.0) == std::cmp::Ordering::Equal
28    }
29}
30impl Eq for OrderedF64 {}
31
32impl PartialOrd for OrderedF64 {
33    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
34        Some(self.cmp(other))
35    }
36}
37impl Ord for OrderedF64 {
38    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39        self.0.total_cmp(&other.0)
40    }
41}
42
43impl PartialEq for OrderableKind {
44    fn eq(&self, other: &Self) -> bool {
45        self.cmp(other) == std::cmp::Ordering::Equal
46    }
47}
48impl Eq for OrderableKind {}
49
50impl PartialOrd for OrderableKind {
51    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
52        Some(self.cmp(other))
53    }
54}
55
56impl Ord for OrderableKind {
57    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58        match (self, other) {
59            (OrderableKind::Num(a), OrderableKind::Num(b)) => a.cmp(b),
60            (OrderableKind::Str(a), OrderableKind::Str(b)) => a.cmp(b),
61            // Numbers sort before strings for cross-type ordering.
62            (OrderableKind::Num(_), OrderableKind::Str(_)) => std::cmp::Ordering::Less,
63            (OrderableKind::Str(_), OrderableKind::Num(_)) => std::cmp::Ordering::Greater,
64        }
65    }
66}
67
68impl OrderableKind {
69    /// Try to convert a Kind value into an OrderableKind for indexing.
70    fn from_kind(kind: &Kind) -> Option<Self> {
71        match kind {
72            Kind::Number(n) => Some(OrderableKind::Num(OrderedF64(n.val))),
73            Kind::Str(s) => Some(OrderableKind::Str(s.clone())),
74            _ => None,
75        }
76    }
77}
78
79/// A collection of B-Tree indexes keyed by field name.
80///
81/// Each index maps orderable values to the set of entity IDs that have that
82/// value for the given field. Supports efficient range lookups.
83pub struct ValueIndex {
84    /// field_name → BTreeMap<value, Vec<entity_id>>
85    indexes: HashMap<String, BTreeMap<OrderableKind, Vec<usize>>>,
86}
87
88impl ValueIndex {
89    /// Create an empty value index.
90    pub fn new() -> Self {
91        Self {
92            indexes: HashMap::new(),
93        }
94    }
95
96    /// Register a field for indexing. Call this before adding entities.
97    pub fn index_field(&mut self, field: &str) {
98        self.indexes.entry(field.to_string()).or_default();
99    }
100
101    /// Returns the set of indexed field names.
102    pub fn indexed_fields(&self) -> impl Iterator<Item = &str> {
103        self.indexes.keys().map(|s| s.as_str())
104    }
105
106    /// Returns true if a given field has a value index.
107    pub fn has_index(&self, field: &str) -> bool {
108        self.indexes.contains_key(field)
109    }
110
111    /// Add an entity's value to the index for a given field.
112    pub fn add(&mut self, entity_id: usize, field: &str, value: &Kind) {
113        if let Some(tree) = self.indexes.get_mut(field)
114            && let Some(key) = OrderableKind::from_kind(value)
115        {
116            tree.entry(key).or_default().push(entity_id);
117        }
118    }
119
120    /// Remove an entity from the index for a given field/value.
121    pub fn remove(&mut self, entity_id: usize, field: &str, value: &Kind) {
122        if let Some(tree) = self.indexes.get_mut(field)
123            && let Some(key) = OrderableKind::from_kind(value)
124            && let Some(ids) = tree.get_mut(&key)
125        {
126            ids.retain(|&id| id != entity_id);
127            if ids.is_empty() {
128                tree.remove(&key);
129            }
130        }
131    }
132
133    /// Look up entity IDs where field == val.
134    pub fn eq_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
135        let key = match OrderableKind::from_kind(val) {
136            Some(k) => k,
137            None => return Vec::new(),
138        };
139        self.indexes
140            .get(field)
141            .and_then(|tree| tree.get(&key))
142            .cloned()
143            .unwrap_or_default()
144    }
145
146    /// Look up entity IDs where field != val (all indexed minus exact match).
147    pub fn ne_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
148        let key = match OrderableKind::from_kind(val) {
149            Some(k) => k,
150            None => return Vec::new(),
151        };
152        let Some(tree) = self.indexes.get(field) else {
153            return Vec::new();
154        };
155        let mut result = Vec::new();
156        for (k, ids) in tree {
157            if k != &key {
158                result.extend(ids);
159            }
160        }
161        result
162    }
163
164    /// Look up entity IDs where field > val.
165    pub fn gt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
166        let key = match OrderableKind::from_kind(val) {
167            Some(k) => k,
168            None => return Vec::new(),
169        };
170        let Some(tree) = self.indexes.get(field) else {
171            return Vec::new();
172        };
173        let mut result = Vec::new();
174        for (_, ids) in tree.range((Bound::Excluded(key), Bound::Unbounded)) {
175            result.extend(ids);
176        }
177        result
178    }
179
180    /// Look up entity IDs where field >= val.
181    pub fn ge_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
182        let key = match OrderableKind::from_kind(val) {
183            Some(k) => k,
184            None => return Vec::new(),
185        };
186        let Some(tree) = self.indexes.get(field) else {
187            return Vec::new();
188        };
189        let mut result = Vec::new();
190        for (_, ids) in tree.range((Bound::Included(key), Bound::Unbounded)) {
191            result.extend(ids);
192        }
193        result
194    }
195
196    /// Look up entity IDs where field < val.
197    pub fn lt_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
198        let key = match OrderableKind::from_kind(val) {
199            Some(k) => k,
200            None => return Vec::new(),
201        };
202        let Some(tree) = self.indexes.get(field) else {
203            return Vec::new();
204        };
205        let mut result = Vec::new();
206        for (_, ids) in tree.range((Bound::Unbounded, Bound::Excluded(key))) {
207            result.extend(ids);
208        }
209        result
210    }
211
212    /// Look up entity IDs where field <= val.
213    pub fn le_lookup(&self, field: &str, val: &Kind) -> Vec<usize> {
214        let key = match OrderableKind::from_kind(val) {
215            Some(k) => k,
216            None => return Vec::new(),
217        };
218        let Some(tree) = self.indexes.get(field) else {
219            return Vec::new();
220        };
221        let mut result = Vec::new();
222        for (_, ids) in tree.range((Bound::Unbounded, Bound::Included(key))) {
223            result.extend(ids);
224        }
225        result
226    }
227
228    /// Clear all indexes.
229    pub fn clear(&mut self) {
230        for tree in self.indexes.values_mut() {
231            tree.clear();
232        }
233    }
234}
235
236impl Default for ValueIndex {
237    fn default() -> Self {
238        Self::new()
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use crate::kinds::Number;
246
247    #[test]
248    fn eq_lookup_returns_matching_ids() {
249        let mut idx = ValueIndex::new();
250        idx.index_field("temp");
251        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
252        idx.add(1, "temp", &Kind::Number(Number::unitless(68.0)));
253        idx.add(2, "temp", &Kind::Number(Number::unitless(72.0)));
254
255        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
256        assert_eq!(result, vec![0, 2]);
257    }
258
259    #[test]
260    fn gt_lookup_returns_greater_ids() {
261        let mut idx = ValueIndex::new();
262        idx.index_field("area");
263        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
264        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
265        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
266        idx.add(3, "area", &Kind::Number(Number::unitless(50.0)));
267
268        let result = idx.gt_lookup("area", &Kind::Number(Number::unitless(150.0)));
269        assert!(result.contains(&2)); // 200
270        assert!(result.contains(&1)); // 500
271        assert!(!result.contains(&0)); // 100
272        assert!(!result.contains(&3)); // 50
273    }
274
275    #[test]
276    fn lt_lookup_returns_lesser_ids() {
277        let mut idx = ValueIndex::new();
278        idx.index_field("area");
279        idx.add(0, "area", &Kind::Number(Number::unitless(100.0)));
280        idx.add(1, "area", &Kind::Number(Number::unitless(500.0)));
281        idx.add(2, "area", &Kind::Number(Number::unitless(200.0)));
282
283        let result = idx.lt_lookup("area", &Kind::Number(Number::unitless(200.0)));
284        assert_eq!(result, vec![0]); // 100 < 200
285    }
286
287    #[test]
288    fn string_index_works() {
289        let mut idx = ValueIndex::new();
290        idx.index_field("dis");
291        idx.add(0, "dis", &Kind::Str("Alpha".to_string()));
292        idx.add(1, "dis", &Kind::Str("Beta".to_string()));
293        idx.add(2, "dis", &Kind::Str("Alpha".to_string()));
294
295        let result = idx.eq_lookup("dis", &Kind::Str("Alpha".to_string()));
296        assert_eq!(result, vec![0, 2]);
297    }
298
299    #[test]
300    fn unindexed_field_returns_empty() {
301        let idx = ValueIndex::new();
302        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
303        assert!(result.is_empty());
304    }
305
306    #[test]
307    fn remove_entity_from_index() {
308        let mut idx = ValueIndex::new();
309        idx.index_field("temp");
310        idx.add(0, "temp", &Kind::Number(Number::unitless(72.0)));
311        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
312
313        idx.remove(0, "temp", &Kind::Number(Number::unitless(72.0)));
314
315        let result = idx.eq_lookup("temp", &Kind::Number(Number::unitless(72.0)));
316        assert_eq!(result, vec![1]);
317    }
318
319    #[test]
320    fn ne_lookup_excludes_matching() {
321        let mut idx = ValueIndex::new();
322        idx.index_field("status");
323        idx.add(0, "status", &Kind::Str("active".to_string()));
324        idx.add(1, "status", &Kind::Str("inactive".to_string()));
325        idx.add(2, "status", &Kind::Str("active".to_string()));
326
327        let result = idx.ne_lookup("status", &Kind::Str("active".to_string()));
328        assert_eq!(result, vec![1]);
329    }
330
331    #[test]
332    fn ge_and_le_lookups() {
333        let mut idx = ValueIndex::new();
334        idx.index_field("temp");
335        idx.add(0, "temp", &Kind::Number(Number::unitless(70.0)));
336        idx.add(1, "temp", &Kind::Number(Number::unitless(72.0)));
337        idx.add(2, "temp", &Kind::Number(Number::unitless(74.0)));
338
339        let ge = idx.ge_lookup("temp", &Kind::Number(Number::unitless(72.0)));
340        assert!(ge.contains(&1)); // 72
341        assert!(ge.contains(&2)); // 74
342        assert!(!ge.contains(&0)); // 70
343
344        let le = idx.le_lookup("temp", &Kind::Number(Number::unitless(72.0)));
345        assert!(le.contains(&0)); // 70
346        assert!(le.contains(&1)); // 72
347        assert!(!le.contains(&2)); // 74
348    }
349}