Skip to main content

mdql_core/
index.rs

1//! B-tree indexes on frontmatter fields for fast filtered queries.
2
3use std::collections::{BTreeMap, HashMap, HashSet};
4
5use crate::model::{Row, Value};
6use crate::schema::Schema;
7
8/// Index for a single table's frontmatter fields.
9/// Each indexed field maps values → set of file paths.
10#[derive(Debug)]
11pub struct TableIndex {
12    indexes: HashMap<String, BTreeMap<IndexKey, Vec<String>>>,
13}
14
15/// Wrapper around Value that implements Ord for use as BTreeMap key.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub enum IndexKey {
18    String(String),
19    Int(i64),
20    Bool(bool),
21    Date(chrono::NaiveDate),
22    DateTime(chrono::NaiveDateTime),
23}
24
25impl PartialOrd for IndexKey {
26    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
27        Some(self.cmp(other))
28    }
29}
30
31impl Ord for IndexKey {
32    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
33        match (self, other) {
34            (IndexKey::Int(a), IndexKey::Int(b)) => a.cmp(b),
35            (IndexKey::String(a), IndexKey::String(b)) => a.cmp(b),
36            (IndexKey::Bool(a), IndexKey::Bool(b)) => a.cmp(b),
37            (IndexKey::Date(a), IndexKey::Date(b)) => a.cmp(b),
38            (IndexKey::DateTime(a), IndexKey::DateTime(b)) => a.cmp(b),
39            // Cross-type: order by variant index
40            _ => self.variant_order().cmp(&other.variant_order()),
41        }
42    }
43}
44
45impl IndexKey {
46    fn variant_order(&self) -> u8 {
47        match self {
48            IndexKey::Bool(_) => 0,
49            IndexKey::Int(_) => 1,
50            IndexKey::String(_) => 2,
51            IndexKey::Date(_) => 3,
52            IndexKey::DateTime(_) => 4,
53        }
54    }
55}
56
57fn value_to_key(v: &Value) -> Option<IndexKey> {
58    match v {
59        Value::String(s) => Some(IndexKey::String(s.clone())),
60        Value::Int(n) => Some(IndexKey::Int(*n)),
61        Value::Bool(b) => Some(IndexKey::Bool(*b)),
62        Value::Date(d) => Some(IndexKey::Date(*d)),
63        Value::DateTime(dt) => Some(IndexKey::DateTime(*dt)),
64        Value::Float(f) => {
65            // Store floats as their bit representation for ordering
66            Some(IndexKey::Int(f.to_bits() as i64))
67        }
68        Value::Null | Value::List(_) => None,
69    }
70}
71
72impl TableIndex {
73    /// Build indexes for all frontmatter fields.
74    pub fn build(rows: &[Row], schema: &Schema) -> Self {
75        let mut indexes: HashMap<String, BTreeMap<IndexKey, Vec<String>>> = HashMap::new();
76
77        // Create an index for each frontmatter field
78        for field_name in schema.frontmatter.keys() {
79            indexes.insert(field_name.clone(), BTreeMap::new());
80        }
81
82        for row in rows {
83            let path = match row.get("path") {
84                Some(Value::String(p)) => p.clone(),
85                _ => continue,
86            };
87
88            for field_name in schema.frontmatter.keys() {
89                if let Some(val) = row.get(field_name) {
90                    if let Some(key) = value_to_key(val) {
91                        indexes
92                            .entry(field_name.clone())
93                            .or_default()
94                            .entry(key)
95                            .or_default()
96                            .push(path.clone());
97                    }
98                }
99            }
100        }
101
102        TableIndex { indexes }
103    }
104
105    /// Lookup rows with exact value match on a field.
106    pub fn lookup_eq(&self, field: &str, value: &Value) -> Vec<&str> {
107        let key = match value_to_key(value) {
108            Some(k) => k,
109            None => return Vec::new(),
110        };
111
112        self.indexes
113            .get(field)
114            .and_then(|btree| btree.get(&key))
115            .map(|paths| paths.iter().map(|s| s.as_str()).collect())
116            .unwrap_or_default()
117    }
118
119    /// Lookup rows within a range on a field (inclusive bounds).
120    pub fn lookup_range(
121        &self,
122        field: &str,
123        min: Option<&Value>,
124        max: Option<&Value>,
125    ) -> Vec<&str> {
126        let btree = match self.indexes.get(field) {
127            Some(bt) => bt,
128            None => return Vec::new(),
129        };
130
131        let min_key = min.and_then(value_to_key);
132        let max_key = max.and_then(value_to_key);
133
134        let mut result = Vec::new();
135
136        use std::ops::Bound;
137        let range_start = match &min_key {
138            Some(k) => Bound::Included(k),
139            None => Bound::Unbounded,
140        };
141        let range_end = match &max_key {
142            Some(k) => Bound::Included(k),
143            None => Bound::Unbounded,
144        };
145
146        for (_key, paths) in btree.range((range_start, range_end)) {
147            for p in paths {
148                result.push(p.as_str());
149            }
150        }
151
152        result
153    }
154
155    /// Lookup rows matching any value in a set (for IN clauses).
156    pub fn lookup_in(&self, field: &str, values: &[Value]) -> Vec<&str> {
157        let mut seen = HashSet::new();
158        let mut result = Vec::new();
159
160        for val in values {
161            for path in self.lookup_eq(field, val) {
162                if seen.insert(path) {
163                    result.push(path);
164                }
165            }
166        }
167
168        result
169    }
170
171    /// Remove all entries for a given path (used on UPDATE/DELETE).
172    pub fn invalidate(&mut self, path: &str) {
173        for btree in self.indexes.values_mut() {
174            for paths in btree.values_mut() {
175                paths.retain(|p| p != path);
176            }
177        }
178    }
179
180    /// Add entries for a new/updated row.
181    pub fn update(&mut self, path: &str, row: &Row, schema: &Schema) {
182        // First remove old entries
183        self.invalidate(path);
184
185        // Then add new entries
186        for field_name in schema.frontmatter.keys() {
187            if let Some(val) = row.get(field_name) {
188                if let Some(key) = value_to_key(val) {
189                    self.indexes
190                        .entry(field_name.clone())
191                        .or_default()
192                        .entry(key)
193                        .or_default()
194                        .push(path.to_string());
195                }
196            }
197        }
198    }
199
200    /// Check if a field is indexed.
201    pub fn has_index(&self, field: &str) -> bool {
202        self.indexes.contains_key(field)
203    }
204
205    /// Get the set of indexed fields.
206    pub fn indexed_fields(&self) -> Vec<&str> {
207        self.indexes.keys().map(|s| s.as_str()).collect()
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214    use crate::schema::*;
215    use indexmap::IndexMap;
216
217    fn test_schema() -> Schema {
218        let mut frontmatter = IndexMap::new();
219        frontmatter.insert(
220            "status".to_string(),
221            FieldDef {
222                field_type: FieldType::String,
223                required: true,
224                enum_values: Some(vec!["ACTIVE".into(), "KILLED".into()]),
225            },
226        );
227        frontmatter.insert(
228            "score".to_string(),
229            FieldDef {
230                field_type: FieldType::Int,
231                required: false,
232                enum_values: None,
233            },
234        );
235        Schema {
236            table: "test".into(),
237            primary_key: "path".into(),
238            frontmatter,
239            h1_required: false,
240            h1_must_equal_frontmatter: None,
241            sections: IndexMap::new(),
242            rules: Rules {
243                reject_unknown_frontmatter: false,
244                reject_unknown_sections: false,
245                reject_duplicate_sections: true,
246                normalize_numbered_headings: false,
247            },
248        }
249    }
250
251    fn test_rows() -> Vec<Row> {
252        vec![
253            Row::from([
254                ("path".into(), Value::String("a.md".into())),
255                ("status".into(), Value::String("ACTIVE".into())),
256                ("score".into(), Value::Int(10)),
257            ]),
258            Row::from([
259                ("path".into(), Value::String("b.md".into())),
260                ("status".into(), Value::String("KILLED".into())),
261                ("score".into(), Value::Int(5)),
262            ]),
263            Row::from([
264                ("path".into(), Value::String("c.md".into())),
265                ("status".into(), Value::String("ACTIVE".into())),
266                ("score".into(), Value::Int(20)),
267            ]),
268        ]
269    }
270
271    #[test]
272    fn test_build_and_lookup_eq() {
273        let idx = TableIndex::build(&test_rows(), &test_schema());
274        let active = idx.lookup_eq("status", &Value::String("ACTIVE".into()));
275        assert_eq!(active.len(), 2);
276        assert!(active.contains(&"a.md"));
277        assert!(active.contains(&"c.md"));
278
279        let killed = idx.lookup_eq("status", &Value::String("KILLED".into()));
280        assert_eq!(killed.len(), 1);
281        assert_eq!(killed[0], "b.md");
282    }
283
284    #[test]
285    fn test_lookup_range() {
286        let idx = TableIndex::build(&test_rows(), &test_schema());
287        let range = idx.lookup_range(
288            "score",
289            Some(&Value::Int(5)),
290            Some(&Value::Int(10)),
291        );
292        assert_eq!(range.len(), 2);
293    }
294
295    #[test]
296    fn test_lookup_in() {
297        let idx = TableIndex::build(&test_rows(), &test_schema());
298        let result = idx.lookup_in(
299            "score",
300            &[Value::Int(5), Value::Int(20)],
301        );
302        assert_eq!(result.len(), 2);
303    }
304
305    #[test]
306    fn test_invalidate_and_update() {
307        let schema = test_schema();
308        let mut idx = TableIndex::build(&test_rows(), &schema);
309
310        // Invalidate b.md
311        idx.invalidate("b.md");
312        let killed = idx.lookup_eq("status", &Value::String("KILLED".into()));
313        assert_eq!(killed.len(), 0);
314
315        // Re-add with updated status
316        let new_row = Row::from([
317            ("path".into(), Value::String("b.md".into())),
318            ("status".into(), Value::String("ACTIVE".into())),
319            ("score".into(), Value::Int(15)),
320        ]);
321        idx.update("b.md", &new_row, &schema);
322        let active = idx.lookup_eq("status", &Value::String("ACTIVE".into()));
323        assert_eq!(active.len(), 3);
324    }
325}