1use crate::convert::{json_type_name, payload_to_json};
4use gluesql::prelude::{Glue, MemoryStorage, Payload};
5use serde_json::{Value, json};
6use std::collections::HashMap;
7use std::sync::Arc;
8use tracing::instrument;
9use turbovault_core::prelude::*;
10use turbovault_vault::VaultManager;
11
12pub struct FrontmatterSqlEngine {
17 manager: Arc<VaultManager>,
18}
19
20pub struct SqlSession {
25 glue: Glue<MemoryStorage>,
26 pub file_count: usize,
27 pub tag_count: usize,
28 pub link_count: usize,
29}
30
31impl FrontmatterSqlEngine {
32 pub fn new(manager: Arc<VaultManager>) -> Self {
33 Self { manager }
34 }
35
36 #[instrument(skip(self), name = "sql_session_build")]
38 pub async fn session(&self) -> Result<SqlSession> {
39 let storage = MemoryStorage::default();
40 let mut glue = Glue::new(storage);
41
42 exec(&mut glue, "CREATE TABLE files").await?;
44 exec(&mut glue, "CREATE TABLE tags (path TEXT, tag TEXT)").await?;
45 exec(
46 &mut glue,
47 "CREATE TABLE links (source TEXT, target TEXT, link_type TEXT, is_valid BOOLEAN)",
48 )
49 .await?;
50
51 let files = self.manager.scan_vault().await?;
52 let vault_path = self.manager.vault_path();
53 let mut file_count = 0usize;
54 let mut tag_count = 0usize;
55
56 for file_path in &files {
57 if !file_path.to_string_lossy().to_lowercase().ends_with(".md") {
58 continue;
59 }
60
61 let vault_file = match self.manager.parse_file(file_path).await {
62 Ok(vf) => vf,
63 Err(_) => continue,
64 };
65
66 file_count += 1;
67
68 let rel_path = file_path
69 .strip_prefix(vault_path)
70 .map(|p| p.to_string_lossy().to_string())
71 .unwrap_or_else(|_| file_path.to_string_lossy().to_string());
72
73 let mut row = serde_json::Map::new();
75 row.insert("path".to_string(), json!(rel_path));
76
77 if let Some(fm) = &vault_file.frontmatter {
78 for (key, value) in &fm.data {
79 row.insert(key.clone(), value.clone());
80 }
81
82 if let Some(tags_val) = fm.data.get("tags") {
84 let tag_strings = extract_tag_strings(tags_val);
85 for tag in &tag_strings {
86 let escaped_path = rel_path.replace('\'', "''");
87 let escaped_tag = tag.replace('\'', "''");
88 let sql =
89 format!("INSERT INTO tags VALUES ('{escaped_path}', '{escaped_tag}')");
90 if let Err(e) = exec(&mut glue, &sql).await {
91 log::warn!("Tag insert error for {rel_path}: {e}");
92 } else {
93 tag_count += 1;
94 }
95 }
96 }
97 }
98
99 let json_str = serde_json::to_string(&Value::Object(row))
100 .map_err(|e| Error::config_error(format!("JSON serialization error: {e}")))?;
101 let escaped = json_str.replace('\'', "''");
102 let insert_sql = format!("INSERT INTO files VALUES ('{escaped}')");
103
104 if let Err(e) = exec(&mut glue, &insert_sql).await {
105 log::warn!("Skipping {rel_path}: insert error: {e}");
106 }
107 }
108
109 let link_count = self.populate_links(&mut glue, vault_path).await;
111
112 Ok(SqlSession {
113 glue,
114 file_count,
115 tag_count,
116 link_count,
117 })
118 }
119
120 #[instrument(skip(self), fields(sql = sql), name = "sql_query")]
122 pub async fn query(&self, sql: &str) -> Result<Value> {
123 let mut session = self.session().await?;
124 session.query(sql).await
125 }
126
127 #[instrument(skip(self), name = "sql_inspect")]
129 pub async fn inspect(&self) -> Result<Value> {
130 let files = self.manager.scan_vault().await?;
131 let vault_path = self.manager.vault_path();
132 let mut schema: HashMap<String, SchemaInfo> = HashMap::new();
133 let mut file_count = 0usize;
134 let mut sample_paths: Vec<String> = Vec::new();
135
136 for file_path in &files {
137 if !file_path.to_string_lossy().to_lowercase().ends_with(".md") {
138 continue;
139 }
140
141 let vault_file = match self.manager.parse_file(file_path).await {
142 Ok(vf) => vf,
143 Err(_) => continue,
144 };
145
146 file_count += 1;
147
148 if sample_paths.len() < 3 {
149 let rel = file_path
150 .strip_prefix(vault_path)
151 .map(|p| p.to_string_lossy().to_string())
152 .unwrap_or_else(|_| file_path.to_string_lossy().to_string());
153 sample_paths.push(rel);
154 }
155
156 if let Some(fm) = &vault_file.frontmatter {
157 for (key, value) in &fm.data {
158 let info = schema.entry(key.clone()).or_insert_with(|| SchemaInfo {
159 type_name: "null".to_string(),
160 count: 0,
161 nullable: true,
162 });
163 info.count += 1;
164 let observed = json_type_name(value);
165 if info.type_name == "null" {
166 info.type_name = observed.to_string();
167 } else if info.type_name != observed && observed != "null" {
168 info.type_name = "mixed".to_string();
169 }
170 }
171 }
172 }
173
174 for info in schema.values_mut() {
175 info.nullable = info.count < file_count;
176 }
177
178 let mut schema_json = serde_json::Map::new();
179 schema_json.insert(
180 "path".to_string(),
181 json!({"type": "string", "nullable": false, "count": file_count}),
182 );
183 for (key, info) in &schema {
184 schema_json.insert(
185 key.clone(),
186 json!({
187 "type": info.type_name,
188 "nullable": info.nullable,
189 "count": info.count
190 }),
191 );
192 }
193
194 Ok(json!({
195 "file_count": file_count,
196 "column_count": schema_json.len(),
197 "schema": schema_json,
198 "tables": {
199 "files": "Schemaless — one row per note with path + all frontmatter keys as columns",
200 "tags": "Structured (path TEXT, tag TEXT) — unnested from frontmatter tags arrays",
201 "links": "Structured (source TEXT, target TEXT, link_type TEXT, is_valid BOOLEAN) — from vault link graph"
202 },
203 "sample_paths": sample_paths,
204 "usage": "Call query_frontmatter_sql with SQL against the files, tags, or links tables"
205 }))
206 }
207
208 async fn populate_links(
210 &self,
211 glue: &mut Glue<MemoryStorage>,
212 vault_path: &std::path::Path,
213 ) -> usize {
214 let graph = self.manager.link_graph();
215 let graph_read = graph.read().await;
216 let all_links = graph_read.all_links();
217 let mut count = 0usize;
218
219 for (source_path, links) in &all_links {
220 let source_rel = source_path
221 .strip_prefix(vault_path)
222 .map(|p| p.to_string_lossy().to_string())
223 .unwrap_or_else(|_| source_path.to_string_lossy().to_string());
224 let escaped_source = source_rel.replace('\'', "''");
225
226 for link in links {
227 let escaped_target = link.target.replace('\'', "''");
228 let link_type = format!("{:?}", link.type_);
229 let is_valid = link.is_valid;
230
231 let sql = format!(
232 "INSERT INTO links VALUES ('{escaped_source}', '{escaped_target}', '{link_type}', {is_valid})"
233 );
234 if exec(glue, &sql).await.is_ok() {
235 count += 1;
236 }
237 }
238 }
239
240 count
241 }
242}
243
244impl SqlSession {
245 pub async fn query(&mut self, sql: &str) -> Result<Value> {
247 let payloads = self
248 .glue
249 .execute(sql)
250 .await
251 .map_err(|e| Error::config_error(format!("SQL error: {e}")))?;
252
253 let result = if payloads.len() == 1 {
254 payload_to_json(payloads.into_iter().next().unwrap())
255 } else {
256 Value::Array(payloads.into_iter().map(payload_to_json).collect())
257 };
258
259 Ok(json!({
260 "file_count": self.file_count,
261 "tag_count": self.tag_count,
262 "link_count": self.link_count,
263 "result": result
264 }))
265 }
266}
267
268struct SchemaInfo {
269 type_name: String,
270 count: usize,
271 nullable: bool,
272}
273
274fn extract_tag_strings(value: &Value) -> Vec<String> {
276 match value {
277 Value::Array(arr) => arr
278 .iter()
279 .filter_map(|v| v.as_str())
280 .map(|s| s.strip_prefix('#').unwrap_or(s).to_string())
281 .collect(),
282 Value::String(s) => s
283 .split(',')
284 .map(|t| {
285 let trimmed = t.trim();
286 trimmed.strip_prefix('#').unwrap_or(trimmed).to_string()
287 })
288 .filter(|t| !t.is_empty())
289 .collect(),
290 _ => vec![],
291 }
292}
293
294async fn exec(glue: &mut Glue<MemoryStorage>, sql: &str) -> Result<Vec<Payload>> {
296 glue.execute(sql)
297 .await
298 .map_err(|e| Error::config_error(format!("SQL error: {e}")))
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304
305 #[tokio::test]
306 async fn test_schemaless_roundtrip() {
307 let storage = MemoryStorage::default();
308 let mut glue = Glue::new(storage);
309
310 exec(&mut glue, "CREATE TABLE test").await.unwrap();
311 exec(
312 &mut glue,
313 r#"INSERT INTO test VALUES ('{"path": "note.md", "status": "active", "priority": 3}')"#,
314 )
315 .await
316 .unwrap();
317 exec(
318 &mut glue,
319 r#"INSERT INTO test VALUES ('{"path": "other.md", "status": "draft"}')"#,
320 )
321 .await
322 .unwrap();
323
324 let payloads = glue
325 .execute("SELECT path, status FROM test WHERE status = 'active'")
326 .await
327 .unwrap();
328
329 assert_eq!(payloads.len(), 1);
330 if let Payload::Select { labels, rows } = &payloads[0] {
331 assert_eq!(labels, &["path", "status"]);
332 assert_eq!(rows.len(), 1);
333 } else {
334 panic!("Expected Select payload");
335 }
336 }
337
338 #[tokio::test]
339 async fn test_aggregation() {
340 let storage = MemoryStorage::default();
341 let mut glue = Glue::new(storage);
342
343 exec(&mut glue, "CREATE TABLE test").await.unwrap();
344 exec(
345 &mut glue,
346 r#"INSERT INTO test VALUES ('{"status": "active"}')"#,
347 )
348 .await
349 .unwrap();
350 exec(
351 &mut glue,
352 r#"INSERT INTO test VALUES ('{"status": "active"}')"#,
353 )
354 .await
355 .unwrap();
356 exec(
357 &mut glue,
358 r#"INSERT INTO test VALUES ('{"status": "draft"}')"#,
359 )
360 .await
361 .unwrap();
362
363 let payloads = glue
364 .execute("SELECT status, COUNT(*) as cnt FROM test GROUP BY status ORDER BY cnt DESC")
365 .await
366 .unwrap();
367
368 if let Payload::Select { rows, .. } = &payloads[0] {
369 assert_eq!(rows.len(), 2);
370 } else {
371 panic!("Expected Select payload");
372 }
373 }
374
375 #[tokio::test]
376 async fn test_structured_tags_table() {
377 let storage = MemoryStorage::default();
378 let mut glue = Glue::new(storage);
379
380 exec(&mut glue, "CREATE TABLE tags (path TEXT, tag TEXT)")
381 .await
382 .unwrap();
383 exec(&mut glue, "INSERT INTO tags VALUES ('note.md', 'work')")
384 .await
385 .unwrap();
386 exec(
387 &mut glue,
388 "INSERT INTO tags VALUES ('note.md', 'important')",
389 )
390 .await
391 .unwrap();
392 exec(&mut glue, "INSERT INTO tags VALUES ('other.md', 'work')")
393 .await
394 .unwrap();
395
396 let payloads = glue
397 .execute("SELECT tag, COUNT(*) as cnt FROM tags GROUP BY tag ORDER BY cnt DESC")
398 .await
399 .unwrap();
400
401 if let Payload::Select { labels, rows } = &payloads[0] {
402 assert_eq!(labels, &["tag", "cnt"]);
403 assert_eq!(rows.len(), 2); } else {
405 panic!("Expected Select payload");
406 }
407 }
408
409 #[tokio::test]
410 async fn test_join_files_and_tags() {
411 let storage = MemoryStorage::default();
412 let mut glue = Glue::new(storage);
413
414 exec(&mut glue, "CREATE TABLE files").await.unwrap();
415 exec(&mut glue, "CREATE TABLE tags (path TEXT, tag TEXT)")
416 .await
417 .unwrap();
418
419 exec(
420 &mut glue,
421 r#"INSERT INTO files VALUES ('{"path": "note.md", "status": "active"}')"#,
422 )
423 .await
424 .unwrap();
425 exec(
426 &mut glue,
427 r#"INSERT INTO files VALUES ('{"path": "other.md", "status": "draft"}')"#,
428 )
429 .await
430 .unwrap();
431 exec(&mut glue, "INSERT INTO tags VALUES ('note.md', 'work')")
432 .await
433 .unwrap();
434
435 let payloads = glue
436 .execute(
437 "SELECT f.path, f.status FROM files f JOIN tags t ON f.path = t.path WHERE t.tag = 'work'",
438 )
439 .await
440 .unwrap();
441
442 if let Payload::Select { rows, .. } = &payloads[0] {
443 assert_eq!(rows.len(), 1);
444 } else {
445 panic!("Expected Select payload");
446 }
447 }
448
449 #[tokio::test]
450 async fn test_links_table() {
451 let storage = MemoryStorage::default();
452 let mut glue = Glue::new(storage);
453
454 exec(
455 &mut glue,
456 "CREATE TABLE links (source TEXT, target TEXT, link_type TEXT, is_valid BOOLEAN)",
457 )
458 .await
459 .unwrap();
460 exec(
461 &mut glue,
462 "INSERT INTO links VALUES ('note.md', 'other.md', 'WikiLink', true)",
463 )
464 .await
465 .unwrap();
466 exec(
467 &mut glue,
468 "INSERT INTO links VALUES ('note.md', 'missing.md', 'WikiLink', false)",
469 )
470 .await
471 .unwrap();
472
473 let payloads = glue
474 .execute("SELECT source, target FROM links WHERE is_valid = false")
475 .await
476 .unwrap();
477
478 if let Payload::Select { rows, .. } = &payloads[0] {
479 assert_eq!(rows.len(), 1);
480 } else {
481 panic!("Expected Select payload");
482 }
483 }
484
485 #[test]
486 fn test_extract_tag_strings_array() {
487 let val = json!(["#work", "personal", "#urgent"]);
488 let tags = extract_tag_strings(&val);
489 assert_eq!(tags, vec!["work", "personal", "urgent"]);
490 }
491
492 #[test]
493 fn test_extract_tag_strings_csv() {
494 let val = json!("#work, personal, #urgent");
495 let tags = extract_tag_strings(&val);
496 assert_eq!(tags, vec!["work", "personal", "urgent"]);
497 }
498
499 #[test]
500 fn test_extract_tag_strings_empty() {
501 assert!(extract_tag_strings(&json!(null)).is_empty());
502 assert!(extract_tag_strings(&json!(42)).is_empty());
503 }
504}