Skip to main content

turbovault_sql/
engine.rs

1//! Core SQL engine: session management, table building, query execution
2
3use crate::convert::{json_type_name, payload_to_json};
4use gluesql::prelude::{Glue, MemoryStorage, Payload};
5use serde_json::{Value, json};
6use std::collections::HashMap;
7use std::sync::Arc;
8use tracing::instrument;
9use turbovault_core::prelude::*;
10use turbovault_vault::VaultManager;
11
12/// SQL-based frontmatter query engine backed by GlueSQL.
13///
14/// Use [`query`](Self::query) for one-shot queries or
15/// [`session`](Self::session) to build tables once and run many queries.
16pub struct FrontmatterSqlEngine {
17    manager: Arc<VaultManager>,
18}
19
20/// A pre-built SQL session with `files`, `tags`, and `links` tables.
21///
22/// Created via [`FrontmatterSqlEngine::session`]. Reuse for multiple
23/// queries to avoid rebuilding the in-memory tables each time.
24pub struct SqlSession {
25    glue: Glue<MemoryStorage>,
26    pub file_count: usize,
27    pub tag_count: usize,
28    pub link_count: usize,
29}
30
31impl FrontmatterSqlEngine {
32    pub fn new(manager: Arc<VaultManager>) -> Self {
33        Self { manager }
34    }
35
36    /// Build all tables and return a reusable session.
37    #[instrument(skip(self), name = "sql_session_build")]
38    pub async fn session(&self) -> Result<SqlSession> {
39        let storage = MemoryStorage::default();
40        let mut glue = Glue::new(storage);
41
42        // Create all three tables
43        exec(&mut glue, "CREATE TABLE files").await?;
44        exec(&mut glue, "CREATE TABLE tags (path TEXT, tag TEXT)").await?;
45        exec(
46            &mut glue,
47            "CREATE TABLE links (source TEXT, target TEXT, link_type TEXT, is_valid BOOLEAN)",
48        )
49        .await?;
50
51        let files = self.manager.scan_vault().await?;
52        let vault_path = self.manager.vault_path();
53        let mut file_count = 0usize;
54        let mut tag_count = 0usize;
55
56        for file_path in &files {
57            if !file_path.to_string_lossy().to_lowercase().ends_with(".md") {
58                continue;
59            }
60
61            let vault_file = match self.manager.parse_file(file_path).await {
62                Ok(vf) => vf,
63                Err(_) => continue,
64            };
65
66            file_count += 1;
67
68            let rel_path = file_path
69                .strip_prefix(vault_path)
70                .map(|p| p.to_string_lossy().to_string())
71                .unwrap_or_else(|_| file_path.to_string_lossy().to_string());
72
73            // --- files table (schemaless JSON) ---
74            let mut row = serde_json::Map::new();
75            row.insert("path".to_string(), json!(rel_path));
76
77            if let Some(fm) = &vault_file.frontmatter {
78                for (key, value) in &fm.data {
79                    row.insert(key.clone(), value.clone());
80                }
81
82                // --- tags table (unnested from frontmatter) ---
83                if let Some(tags_val) = fm.data.get("tags") {
84                    let tag_strings = extract_tag_strings(tags_val);
85                    for tag in &tag_strings {
86                        let escaped_path = rel_path.replace('\'', "''");
87                        let escaped_tag = tag.replace('\'', "''");
88                        let sql =
89                            format!("INSERT INTO tags VALUES ('{escaped_path}', '{escaped_tag}')");
90                        if let Err(e) = exec(&mut glue, &sql).await {
91                            log::warn!("Tag insert error for {rel_path}: {e}");
92                        } else {
93                            tag_count += 1;
94                        }
95                    }
96                }
97            }
98
99            let json_str = serde_json::to_string(&Value::Object(row))
100                .map_err(|e| Error::config_error(format!("JSON serialization error: {e}")))?;
101            let escaped = json_str.replace('\'', "''");
102            let insert_sql = format!("INSERT INTO files VALUES ('{escaped}')");
103
104            if let Err(e) = exec(&mut glue, &insert_sql).await {
105                log::warn!("Skipping {rel_path}: insert error: {e}");
106            }
107        }
108
109        // --- links table (from link graph) ---
110        let link_count = self.populate_links(&mut glue, vault_path).await;
111
112        Ok(SqlSession {
113            glue,
114            file_count,
115            tag_count,
116            link_count,
117        })
118    }
119
120    /// One-shot: build tables, execute SQL, discard.
121    #[instrument(skip(self), fields(sql = sql), name = "sql_query")]
122    pub async fn query(&self, sql: &str) -> Result<Value> {
123        let mut session = self.session().await?;
124        session.query(sql).await
125    }
126
127    /// Inspect the frontmatter schema across all vault files.
128    #[instrument(skip(self), name = "sql_inspect")]
129    pub async fn inspect(&self) -> Result<Value> {
130        let files = self.manager.scan_vault().await?;
131        let vault_path = self.manager.vault_path();
132        let mut schema: HashMap<String, SchemaInfo> = HashMap::new();
133        let mut file_count = 0usize;
134        let mut sample_paths: Vec<String> = Vec::new();
135
136        for file_path in &files {
137            if !file_path.to_string_lossy().to_lowercase().ends_with(".md") {
138                continue;
139            }
140
141            let vault_file = match self.manager.parse_file(file_path).await {
142                Ok(vf) => vf,
143                Err(_) => continue,
144            };
145
146            file_count += 1;
147
148            if sample_paths.len() < 3 {
149                let rel = file_path
150                    .strip_prefix(vault_path)
151                    .map(|p| p.to_string_lossy().to_string())
152                    .unwrap_or_else(|_| file_path.to_string_lossy().to_string());
153                sample_paths.push(rel);
154            }
155
156            if let Some(fm) = &vault_file.frontmatter {
157                for (key, value) in &fm.data {
158                    let info = schema.entry(key.clone()).or_insert_with(|| SchemaInfo {
159                        type_name: "null".to_string(),
160                        count: 0,
161                        nullable: true,
162                    });
163                    info.count += 1;
164                    let observed = json_type_name(value);
165                    if info.type_name == "null" {
166                        info.type_name = observed.to_string();
167                    } else if info.type_name != observed && observed != "null" {
168                        info.type_name = "mixed".to_string();
169                    }
170                }
171            }
172        }
173
174        for info in schema.values_mut() {
175            info.nullable = info.count < file_count;
176        }
177
178        let mut schema_json = serde_json::Map::new();
179        schema_json.insert(
180            "path".to_string(),
181            json!({"type": "string", "nullable": false, "count": file_count}),
182        );
183        for (key, info) in &schema {
184            schema_json.insert(
185                key.clone(),
186                json!({
187                    "type": info.type_name,
188                    "nullable": info.nullable,
189                    "count": info.count
190                }),
191            );
192        }
193
194        Ok(json!({
195            "file_count": file_count,
196            "column_count": schema_json.len(),
197            "schema": schema_json,
198            "tables": {
199                "files": "Schemaless — one row per note with path + all frontmatter keys as columns",
200                "tags": "Structured (path TEXT, tag TEXT) — unnested from frontmatter tags arrays",
201                "links": "Structured (source TEXT, target TEXT, link_type TEXT, is_valid BOOLEAN) — from vault link graph"
202            },
203            "sample_paths": sample_paths,
204            "usage": "Call query_frontmatter_sql with SQL against the files, tags, or links tables"
205        }))
206    }
207
208    /// Populate the `links` table from the vault link graph.
209    async fn populate_links(
210        &self,
211        glue: &mut Glue<MemoryStorage>,
212        vault_path: &std::path::Path,
213    ) -> usize {
214        let graph = self.manager.link_graph();
215        let graph_read = graph.read().await;
216        let all_links = graph_read.all_links();
217        let mut count = 0usize;
218
219        for (source_path, links) in &all_links {
220            let source_rel = source_path
221                .strip_prefix(vault_path)
222                .map(|p| p.to_string_lossy().to_string())
223                .unwrap_or_else(|_| source_path.to_string_lossy().to_string());
224            let escaped_source = source_rel.replace('\'', "''");
225
226            for link in links {
227                let escaped_target = link.target.replace('\'', "''");
228                let link_type = format!("{:?}", link.type_);
229                let is_valid = link.is_valid;
230
231                let sql = format!(
232                    "INSERT INTO links VALUES ('{escaped_source}', '{escaped_target}', '{link_type}', {is_valid})"
233                );
234                if exec(glue, &sql).await.is_ok() {
235                    count += 1;
236                }
237            }
238        }
239
240        count
241    }
242}
243
244impl SqlSession {
245    /// Execute a SQL query against the pre-built tables.
246    pub async fn query(&mut self, sql: &str) -> Result<Value> {
247        let payloads = self
248            .glue
249            .execute(sql)
250            .await
251            .map_err(|e| Error::config_error(format!("SQL error: {e}")))?;
252
253        let result = if payloads.len() == 1 {
254            payload_to_json(payloads.into_iter().next().unwrap())
255        } else {
256            Value::Array(payloads.into_iter().map(payload_to_json).collect())
257        };
258
259        Ok(json!({
260            "file_count": self.file_count,
261            "tag_count": self.tag_count,
262            "link_count": self.link_count,
263            "result": result
264        }))
265    }
266}
267
268struct SchemaInfo {
269    type_name: String,
270    count: usize,
271    nullable: bool,
272}
273
274/// Extract tag strings from a frontmatter value (handles arrays and comma-separated strings).
275fn extract_tag_strings(value: &Value) -> Vec<String> {
276    match value {
277        Value::Array(arr) => arr
278            .iter()
279            .filter_map(|v| v.as_str())
280            .map(|s| s.strip_prefix('#').unwrap_or(s).to_string())
281            .collect(),
282        Value::String(s) => s
283            .split(',')
284            .map(|t| {
285                let trimmed = t.trim();
286                trimmed.strip_prefix('#').unwrap_or(trimmed).to_string()
287            })
288            .filter(|t| !t.is_empty())
289            .collect(),
290        _ => vec![],
291    }
292}
293
294/// Execute a SQL statement, mapping errors to `turbovault_core::Error`.
295async fn exec(glue: &mut Glue<MemoryStorage>, sql: &str) -> Result<Vec<Payload>> {
296    glue.execute(sql)
297        .await
298        .map_err(|e| Error::config_error(format!("SQL error: {e}")))
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[tokio::test]
306    async fn test_schemaless_roundtrip() {
307        let storage = MemoryStorage::default();
308        let mut glue = Glue::new(storage);
309
310        exec(&mut glue, "CREATE TABLE test").await.unwrap();
311        exec(
312            &mut glue,
313            r#"INSERT INTO test VALUES ('{"path": "note.md", "status": "active", "priority": 3}')"#,
314        )
315        .await
316        .unwrap();
317        exec(
318            &mut glue,
319            r#"INSERT INTO test VALUES ('{"path": "other.md", "status": "draft"}')"#,
320        )
321        .await
322        .unwrap();
323
324        let payloads = glue
325            .execute("SELECT path, status FROM test WHERE status = 'active'")
326            .await
327            .unwrap();
328
329        assert_eq!(payloads.len(), 1);
330        if let Payload::Select { labels, rows } = &payloads[0] {
331            assert_eq!(labels, &["path", "status"]);
332            assert_eq!(rows.len(), 1);
333        } else {
334            panic!("Expected Select payload");
335        }
336    }
337
338    #[tokio::test]
339    async fn test_aggregation() {
340        let storage = MemoryStorage::default();
341        let mut glue = Glue::new(storage);
342
343        exec(&mut glue, "CREATE TABLE test").await.unwrap();
344        exec(
345            &mut glue,
346            r#"INSERT INTO test VALUES ('{"status": "active"}')"#,
347        )
348        .await
349        .unwrap();
350        exec(
351            &mut glue,
352            r#"INSERT INTO test VALUES ('{"status": "active"}')"#,
353        )
354        .await
355        .unwrap();
356        exec(
357            &mut glue,
358            r#"INSERT INTO test VALUES ('{"status": "draft"}')"#,
359        )
360        .await
361        .unwrap();
362
363        let payloads = glue
364            .execute("SELECT status, COUNT(*) as cnt FROM test GROUP BY status ORDER BY cnt DESC")
365            .await
366            .unwrap();
367
368        if let Payload::Select { rows, .. } = &payloads[0] {
369            assert_eq!(rows.len(), 2);
370        } else {
371            panic!("Expected Select payload");
372        }
373    }
374
375    #[tokio::test]
376    async fn test_structured_tags_table() {
377        let storage = MemoryStorage::default();
378        let mut glue = Glue::new(storage);
379
380        exec(&mut glue, "CREATE TABLE tags (path TEXT, tag TEXT)")
381            .await
382            .unwrap();
383        exec(&mut glue, "INSERT INTO tags VALUES ('note.md', 'work')")
384            .await
385            .unwrap();
386        exec(
387            &mut glue,
388            "INSERT INTO tags VALUES ('note.md', 'important')",
389        )
390        .await
391        .unwrap();
392        exec(&mut glue, "INSERT INTO tags VALUES ('other.md', 'work')")
393            .await
394            .unwrap();
395
396        let payloads = glue
397            .execute("SELECT tag, COUNT(*) as cnt FROM tags GROUP BY tag ORDER BY cnt DESC")
398            .await
399            .unwrap();
400
401        if let Payload::Select { labels, rows } = &payloads[0] {
402            assert_eq!(labels, &["tag", "cnt"]);
403            assert_eq!(rows.len(), 2); // work=2, important=1
404        } else {
405            panic!("Expected Select payload");
406        }
407    }
408
409    #[tokio::test]
410    async fn test_join_files_and_tags() {
411        let storage = MemoryStorage::default();
412        let mut glue = Glue::new(storage);
413
414        exec(&mut glue, "CREATE TABLE files").await.unwrap();
415        exec(&mut glue, "CREATE TABLE tags (path TEXT, tag TEXT)")
416            .await
417            .unwrap();
418
419        exec(
420            &mut glue,
421            r#"INSERT INTO files VALUES ('{"path": "note.md", "status": "active"}')"#,
422        )
423        .await
424        .unwrap();
425        exec(
426            &mut glue,
427            r#"INSERT INTO files VALUES ('{"path": "other.md", "status": "draft"}')"#,
428        )
429        .await
430        .unwrap();
431        exec(&mut glue, "INSERT INTO tags VALUES ('note.md', 'work')")
432            .await
433            .unwrap();
434
435        let payloads = glue
436            .execute(
437                "SELECT f.path, f.status FROM files f JOIN tags t ON f.path = t.path WHERE t.tag = 'work'",
438            )
439            .await
440            .unwrap();
441
442        if let Payload::Select { rows, .. } = &payloads[0] {
443            assert_eq!(rows.len(), 1);
444        } else {
445            panic!("Expected Select payload");
446        }
447    }
448
449    #[tokio::test]
450    async fn test_links_table() {
451        let storage = MemoryStorage::default();
452        let mut glue = Glue::new(storage);
453
454        exec(
455            &mut glue,
456            "CREATE TABLE links (source TEXT, target TEXT, link_type TEXT, is_valid BOOLEAN)",
457        )
458        .await
459        .unwrap();
460        exec(
461            &mut glue,
462            "INSERT INTO links VALUES ('note.md', 'other.md', 'WikiLink', true)",
463        )
464        .await
465        .unwrap();
466        exec(
467            &mut glue,
468            "INSERT INTO links VALUES ('note.md', 'missing.md', 'WikiLink', false)",
469        )
470        .await
471        .unwrap();
472
473        let payloads = glue
474            .execute("SELECT source, target FROM links WHERE is_valid = false")
475            .await
476            .unwrap();
477
478        if let Payload::Select { rows, .. } = &payloads[0] {
479            assert_eq!(rows.len(), 1);
480        } else {
481            panic!("Expected Select payload");
482        }
483    }
484
485    #[test]
486    fn test_extract_tag_strings_array() {
487        let val = json!(["#work", "personal", "#urgent"]);
488        let tags = extract_tag_strings(&val);
489        assert_eq!(tags, vec!["work", "personal", "urgent"]);
490    }
491
492    #[test]
493    fn test_extract_tag_strings_csv() {
494        let val = json!("#work, personal, #urgent");
495        let tags = extract_tag_strings(&val);
496        assert_eq!(tags, vec!["work", "personal", "urgent"]);
497    }
498
499    #[test]
500    fn test_extract_tag_strings_empty() {
501        assert!(extract_tag_strings(&json!(null)).is_empty());
502        assert!(extract_tag_strings(&json!(42)).is_empty());
503    }
504}