Skip to main content

mdql_core/
index.rs

1//! B-tree indexes on frontmatter fields for fast filtered queries.
2
3use std::collections::{BTreeMap, HashMap, HashSet};
4
5use crate::model::{Row, Value};
6use crate::schema::Schema;
7
8/// Index for a single table's frontmatter fields.
9/// Each indexed field maps values → set of file paths.
10#[derive(Debug)]
11pub struct TableIndex {
12    indexes: HashMap<String, BTreeMap<IndexKey, Vec<String>>>,
13}
14
15/// Wrapper around Value that implements Ord for use as BTreeMap key.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub enum IndexKey {
18    String(String),
19    Int(i64),
20    Bool(bool),
21    Date(chrono::NaiveDate),
22}
23
24impl PartialOrd for IndexKey {
25    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
26        Some(self.cmp(other))
27    }
28}
29
30impl Ord for IndexKey {
31    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
32        match (self, other) {
33            (IndexKey::Int(a), IndexKey::Int(b)) => a.cmp(b),
34            (IndexKey::String(a), IndexKey::String(b)) => a.cmp(b),
35            (IndexKey::Bool(a), IndexKey::Bool(b)) => a.cmp(b),
36            (IndexKey::Date(a), IndexKey::Date(b)) => a.cmp(b),
37            // Cross-type: order by variant index
38            _ => self.variant_order().cmp(&other.variant_order()),
39        }
40    }
41}
42
43impl IndexKey {
44    fn variant_order(&self) -> u8 {
45        match self {
46            IndexKey::Bool(_) => 0,
47            IndexKey::Int(_) => 1,
48            IndexKey::String(_) => 2,
49            IndexKey::Date(_) => 3,
50        }
51    }
52}
53
54fn value_to_key(v: &Value) -> Option<IndexKey> {
55    match v {
56        Value::String(s) => Some(IndexKey::String(s.clone())),
57        Value::Int(n) => Some(IndexKey::Int(*n)),
58        Value::Bool(b) => Some(IndexKey::Bool(*b)),
59        Value::Date(d) => Some(IndexKey::Date(*d)),
60        Value::Float(f) => {
61            // Store floats as their bit representation for ordering
62            Some(IndexKey::Int(f.to_bits() as i64))
63        }
64        Value::Null | Value::List(_) => None,
65    }
66}
67
68impl TableIndex {
69    /// Build indexes for all frontmatter fields.
70    pub fn build(rows: &[Row], schema: &Schema) -> Self {
71        let mut indexes: HashMap<String, BTreeMap<IndexKey, Vec<String>>> = HashMap::new();
72
73        // Create an index for each frontmatter field
74        for field_name in schema.frontmatter.keys() {
75            indexes.insert(field_name.clone(), BTreeMap::new());
76        }
77
78        for row in rows {
79            let path = match row.get("path") {
80                Some(Value::String(p)) => p.clone(),
81                _ => continue,
82            };
83
84            for field_name in schema.frontmatter.keys() {
85                if let Some(val) = row.get(field_name) {
86                    if let Some(key) = value_to_key(val) {
87                        indexes
88                            .entry(field_name.clone())
89                            .or_default()
90                            .entry(key)
91                            .or_default()
92                            .push(path.clone());
93                    }
94                }
95            }
96        }
97
98        TableIndex { indexes }
99    }
100
101    /// Lookup rows with exact value match on a field.
102    pub fn lookup_eq(&self, field: &str, value: &Value) -> Vec<&str> {
103        let key = match value_to_key(value) {
104            Some(k) => k,
105            None => return Vec::new(),
106        };
107
108        self.indexes
109            .get(field)
110            .and_then(|btree| btree.get(&key))
111            .map(|paths| paths.iter().map(|s| s.as_str()).collect())
112            .unwrap_or_default()
113    }
114
115    /// Lookup rows within a range on a field (inclusive bounds).
116    pub fn lookup_range(
117        &self,
118        field: &str,
119        min: Option<&Value>,
120        max: Option<&Value>,
121    ) -> Vec<&str> {
122        let btree = match self.indexes.get(field) {
123            Some(bt) => bt,
124            None => return Vec::new(),
125        };
126
127        let min_key = min.and_then(value_to_key);
128        let max_key = max.and_then(value_to_key);
129
130        let mut result = Vec::new();
131
132        use std::ops::Bound;
133        let range_start = match &min_key {
134            Some(k) => Bound::Included(k),
135            None => Bound::Unbounded,
136        };
137        let range_end = match &max_key {
138            Some(k) => Bound::Included(k),
139            None => Bound::Unbounded,
140        };
141
142        for (_key, paths) in btree.range((range_start, range_end)) {
143            for p in paths {
144                result.push(p.as_str());
145            }
146        }
147
148        result
149    }
150
151    /// Lookup rows matching any value in a set (for IN clauses).
152    pub fn lookup_in(&self, field: &str, values: &[Value]) -> Vec<&str> {
153        let mut seen = HashSet::new();
154        let mut result = Vec::new();
155
156        for val in values {
157            for path in self.lookup_eq(field, val) {
158                if seen.insert(path) {
159                    result.push(path);
160                }
161            }
162        }
163
164        result
165    }
166
167    /// Remove all entries for a given path (used on UPDATE/DELETE).
168    pub fn invalidate(&mut self, path: &str) {
169        for btree in self.indexes.values_mut() {
170            for paths in btree.values_mut() {
171                paths.retain(|p| p != path);
172            }
173        }
174    }
175
176    /// Add entries for a new/updated row.
177    pub fn update(&mut self, path: &str, row: &Row, schema: &Schema) {
178        // First remove old entries
179        self.invalidate(path);
180
181        // Then add new entries
182        for field_name in schema.frontmatter.keys() {
183            if let Some(val) = row.get(field_name) {
184                if let Some(key) = value_to_key(val) {
185                    self.indexes
186                        .entry(field_name.clone())
187                        .or_default()
188                        .entry(key)
189                        .or_default()
190                        .push(path.to_string());
191                }
192            }
193        }
194    }
195
196    /// Check if a field is indexed.
197    pub fn has_index(&self, field: &str) -> bool {
198        self.indexes.contains_key(field)
199    }
200
201    /// Get the set of indexed fields.
202    pub fn indexed_fields(&self) -> Vec<&str> {
203        self.indexes.keys().map(|s| s.as_str()).collect()
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use crate::schema::*;
211    use indexmap::IndexMap;
212
213    fn test_schema() -> Schema {
214        let mut frontmatter = IndexMap::new();
215        frontmatter.insert(
216            "status".to_string(),
217            FieldDef {
218                field_type: FieldType::String,
219                required: true,
220                enum_values: Some(vec!["ACTIVE".into(), "KILLED".into()]),
221            },
222        );
223        frontmatter.insert(
224            "score".to_string(),
225            FieldDef {
226                field_type: FieldType::Int,
227                required: false,
228                enum_values: None,
229            },
230        );
231        Schema {
232            table: "test".into(),
233            primary_key: "path".into(),
234            frontmatter,
235            h1_required: false,
236            h1_must_equal_frontmatter: None,
237            sections: IndexMap::new(),
238            rules: Rules {
239                reject_unknown_frontmatter: false,
240                reject_unknown_sections: false,
241                reject_duplicate_sections: true,
242                normalize_numbered_headings: false,
243            },
244        }
245    }
246
247    fn test_rows() -> Vec<Row> {
248        vec![
249            Row::from([
250                ("path".into(), Value::String("a.md".into())),
251                ("status".into(), Value::String("ACTIVE".into())),
252                ("score".into(), Value::Int(10)),
253            ]),
254            Row::from([
255                ("path".into(), Value::String("b.md".into())),
256                ("status".into(), Value::String("KILLED".into())),
257                ("score".into(), Value::Int(5)),
258            ]),
259            Row::from([
260                ("path".into(), Value::String("c.md".into())),
261                ("status".into(), Value::String("ACTIVE".into())),
262                ("score".into(), Value::Int(20)),
263            ]),
264        ]
265    }
266
267    #[test]
268    fn test_build_and_lookup_eq() {
269        let idx = TableIndex::build(&test_rows(), &test_schema());
270        let active = idx.lookup_eq("status", &Value::String("ACTIVE".into()));
271        assert_eq!(active.len(), 2);
272        assert!(active.contains(&"a.md"));
273        assert!(active.contains(&"c.md"));
274
275        let killed = idx.lookup_eq("status", &Value::String("KILLED".into()));
276        assert_eq!(killed.len(), 1);
277        assert_eq!(killed[0], "b.md");
278    }
279
280    #[test]
281    fn test_lookup_range() {
282        let idx = TableIndex::build(&test_rows(), &test_schema());
283        let range = idx.lookup_range(
284            "score",
285            Some(&Value::Int(5)),
286            Some(&Value::Int(10)),
287        );
288        assert_eq!(range.len(), 2);
289    }
290
291    #[test]
292    fn test_lookup_in() {
293        let idx = TableIndex::build(&test_rows(), &test_schema());
294        let result = idx.lookup_in(
295            "score",
296            &[Value::Int(5), Value::Int(20)],
297        );
298        assert_eq!(result.len(), 2);
299    }
300
301    #[test]
302    fn test_invalidate_and_update() {
303        let schema = test_schema();
304        let mut idx = TableIndex::build(&test_rows(), &schema);
305
306        // Invalidate b.md
307        idx.invalidate("b.md");
308        let killed = idx.lookup_eq("status", &Value::String("KILLED".into()));
309        assert_eq!(killed.len(), 0);
310
311        // Re-add with updated status
312        let new_row = Row::from([
313            ("path".into(), Value::String("b.md".into())),
314            ("status".into(), Value::String("ACTIVE".into())),
315            ("score".into(), Value::Int(15)),
316        ]);
317        idx.update("b.md", &new_row, &schema);
318        let active = idx.lookup_eq("status", &Value::String("ACTIVE".into()));
319        assert_eq!(active.len(), 3);
320    }
321}