Skip to main content

open_kioku_storage_sqlite/
lib.rs

1use open_kioku_core::{
2    AnalysisFact, CodeChunk, EvidenceSourceType, File, FileId, GraphEdge, GraphNode, Import,
3    IndexManifest, Symbol, SymbolId, SymbolOccurrence, TestTarget,
4};
5use open_kioku_errors::{OkError, Result};
6use open_kioku_storage::{GraphStore, IndexData, MetadataStore};
7use rusqlite::{params, Connection, OptionalExtension};
8use std::path::{Path, PathBuf};
9use std::sync::Mutex;
10
11pub struct SqliteStore {
12    path: PathBuf,
13    connection: Mutex<Connection>,
14}
15
16impl SqliteStore {
17    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
18        let path = path.as_ref().to_path_buf();
19        if let Some(parent) = path.parent() {
20            std::fs::create_dir_all(parent)?;
21        }
22        let connection = Connection::open_with_flags(
23            &path,
24            rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE
25                | rusqlite::OpenFlags::SQLITE_OPEN_CREATE
26                | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
27        )
28        .map_err(storage_err)?;
29        let store = Self {
30            path,
31            connection: Mutex::new(connection),
32        };
33        store.initialize()?;
34        Ok(store)
35    }
36
37    pub fn path(&self) -> &Path {
38        &self.path
39    }
40}
41
42impl MetadataStore for SqliteStore {
43    fn initialize(&self) -> Result<()> {
44        let conn = self
45            .connection
46            .lock()
47            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
48        conn.execute_batch(
49            r#"
50            PRAGMA journal_mode = WAL;
51            CREATE TABLE IF NOT EXISTS manifests (
52              id INTEGER PRIMARY KEY CHECK (id = 1),
53              json TEXT NOT NULL
54            );
55            CREATE TABLE IF NOT EXISTS files (
56              id TEXT PRIMARY KEY,
57              path TEXT NOT NULL UNIQUE,
58              json TEXT NOT NULL
59            );
60            CREATE TABLE IF NOT EXISTS symbols (
61              id TEXT PRIMARY KEY,
62              name TEXT NOT NULL,
63              qualified_name TEXT NOT NULL,
64              file_id TEXT NOT NULL,
65              json TEXT NOT NULL
66            );
67            CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
68            CREATE TABLE IF NOT EXISTS chunks (
69              id TEXT PRIMARY KEY,
70              file_id TEXT NOT NULL,
71              start_line INTEGER NOT NULL,
72              end_line INTEGER NOT NULL,
73              text TEXT NOT NULL,
74              json TEXT NOT NULL
75            );
76            CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
77            CREATE TABLE IF NOT EXISTS tests (
78              id TEXT PRIMARY KEY,
79              file_id TEXT NOT NULL,
80              json TEXT NOT NULL
81            );
82            CREATE INDEX IF NOT EXISTS idx_tests_file ON tests(file_id);
83            CREATE TABLE IF NOT EXISTS imports (
84              id TEXT PRIMARY KEY,
85              file_id TEXT NOT NULL,
86              imported TEXT NOT NULL,
87              json TEXT NOT NULL
88            );
89            CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file_id);
90            CREATE TABLE IF NOT EXISTS occurrences (
91              id TEXT PRIMARY KEY,
92              symbol_id TEXT NOT NULL,
93              file_id TEXT NOT NULL,
94              is_definition INTEGER NOT NULL,
95              json TEXT NOT NULL
96            );
97            CREATE INDEX IF NOT EXISTS idx_occurrences_symbol ON occurrences(symbol_id);
98            CREATE INDEX IF NOT EXISTS idx_occurrences_file ON occurrences(file_id);
99            CREATE TABLE IF NOT EXISTS analysis_facts (
100              id TEXT PRIMARY KEY,
101              file_id TEXT NOT NULL,
102              source_type TEXT NOT NULL,
103              target TEXT NOT NULL,
104              json TEXT NOT NULL
105            );
106            CREATE INDEX IF NOT EXISTS idx_analysis_facts_file ON analysis_facts(file_id);
107            CREATE INDEX IF NOT EXISTS idx_analysis_facts_source ON analysis_facts(source_type);
108            CREATE TABLE IF NOT EXISTS vector_targets (
109              id TEXT PRIMARY KEY,
110              file_id TEXT NOT NULL,
111              target_kind TEXT NOT NULL,
112              content_hash TEXT NOT NULL,
113              vector_id INTEGER NOT NULL,
114              model TEXT NOT NULL,
115              dimensions INTEGER NOT NULL,
116              json TEXT NOT NULL
117            );
118            CREATE INDEX IF NOT EXISTS idx_vector_targets_file ON vector_targets(file_id);
119            CREATE TABLE IF NOT EXISTS embedding_cache (
120              cache_key TEXT PRIMARY KEY,
121              target_id TEXT NOT NULL,
122              content_hash TEXT NOT NULL,
123              model TEXT NOT NULL,
124              dimensions INTEGER NOT NULL,
125              json TEXT NOT NULL
126            );
127            CREATE TABLE IF NOT EXISTS semantic_index_runs (
128              id TEXT PRIMARY KEY,
129              status TEXT NOT NULL,
130              model TEXT NOT NULL,
131              dimensions INTEGER NOT NULL,
132              vector_count INTEGER NOT NULL,
133              created_at TEXT NOT NULL,
134              json TEXT NOT NULL
135            );
136            CREATE TABLE IF NOT EXISTS semantic_coverage (
137              id TEXT PRIMARY KEY,
138              target_kind TEXT NOT NULL,
139              indexed_count INTEGER NOT NULL,
140              stale_count INTEGER NOT NULL,
141              failed_count INTEGER NOT NULL,
142              json TEXT NOT NULL
143            );
144            CREATE TABLE IF NOT EXISTS graph_nodes (
145              id TEXT PRIMARY KEY,
146              label TEXT NOT NULL,
147              json TEXT NOT NULL
148            );
149            CREATE TABLE IF NOT EXISTS graph_edges (
150              id TEXT PRIMARY KEY,
151              from_id TEXT NOT NULL,
152              to_id TEXT NOT NULL,
153              edge_type TEXT NOT NULL,
154              json TEXT NOT NULL
155            );
156            CREATE INDEX IF NOT EXISTS idx_graph_edges_from ON graph_edges(from_id);
157            CREATE INDEX IF NOT EXISTS idx_graph_edges_to ON graph_edges(to_id);
158            "#,
159        )
160        .map_err(storage_err)?;
161        Ok(())
162    }
163
164    fn put_manifest(&self, manifest: &IndexManifest) -> Result<()> {
165        let conn = self
166            .connection
167            .lock()
168            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
169        let json = serde_json::to_string(manifest)?;
170        conn.execute(
171            "INSERT INTO manifests(id, json) VALUES(1, ?1) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
172            params![json],
173        )
174        .map_err(storage_err)?;
175        Ok(())
176    }
177
178    fn manifest(&self) -> Result<Option<IndexManifest>> {
179        let conn = self
180            .connection
181            .lock()
182            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
183        let raw: Option<String> = conn
184            .query_row("SELECT json FROM manifests WHERE id = 1", [], |row| {
185                row.get(0)
186            })
187            .optional()
188            .map_err(storage_err)?;
189        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
190            .transpose()
191    }
192
193    fn replace_index(&self, data: IndexData<'_>) -> Result<()> {
194        let mut conn = self
195            .connection
196            .lock()
197            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
198        let tx = conn.transaction().map_err(storage_err)?;
199        tx.execute("DELETE FROM occurrences", [])
200            .map_err(storage_err)?;
201        tx.execute("DELETE FROM analysis_facts", [])
202            .map_err(storage_err)?;
203        tx.execute("DELETE FROM imports", []).map_err(storage_err)?;
204        tx.execute("DELETE FROM tests", []).map_err(storage_err)?;
205        tx.execute("DELETE FROM chunks", []).map_err(storage_err)?;
206        tx.execute("DELETE FROM symbols", []).map_err(storage_err)?;
207        tx.execute("DELETE FROM files", []).map_err(storage_err)?;
208        tx.execute("DELETE FROM manifests", [])
209            .map_err(storage_err)?;
210        tx.execute(
211            "INSERT INTO manifests(id, json) VALUES(1, ?1)",
212            params![serde_json::to_string(data.manifest)?],
213        )
214        .map_err(storage_err)?;
215        for file in data.files {
216            tx.execute(
217                "INSERT INTO files(id, path, json) VALUES(?1, ?2, ?3)",
218                params![
219                    &file.id.0,
220                    file.path.to_string_lossy().as_ref(),
221                    serde_json::to_string(file)?
222                ],
223            )
224            .map_err(storage_err)?;
225        }
226        for symbol in data.symbols {
227            tx.execute(
228                "INSERT INTO symbols(id, name, qualified_name, file_id, json) VALUES(?1, ?2, ?3, ?4, ?5)",
229                params![
230                    &symbol.id.0,
231                    &symbol.name,
232                    &symbol.qualified_name,
233                    &symbol.file_id.0,
234                    serde_json::to_string(symbol)?
235                ],
236            )
237            .map_err(storage_err)?;
238        }
239        for chunk in data.chunks {
240            tx.execute(
241                "INSERT INTO chunks(id, file_id, start_line, end_line, text, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
242                params![
243                    &chunk.id,
244                    &chunk.file_id.0,
245                    chunk.range.start,
246                    chunk.range.end,
247                    &chunk.text,
248                    serde_json::to_string(chunk)?
249                ],
250            )
251            .map_err(storage_err)?;
252        }
253        for test in data.tests {
254            tx.execute(
255                "INSERT INTO tests(id, file_id, json) VALUES(?1, ?2, ?3) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
256                params![&test.id, &test.file_id.0, serde_json::to_string(test)?],
257            )
258            .map_err(storage_err)?;
259        }
260        for import in data.imports {
261            tx.execute(
262                "INSERT INTO imports(id, file_id, imported, json) VALUES(?1, ?2, ?3, ?4)",
263                params![
264                    occurrence_id(
265                        &import.file_id.0,
266                        &import.imported,
267                        import.range.as_ref().map(|range| range.start),
268                        true
269                    ),
270                    &import.file_id.0,
271                    &import.imported,
272                    serde_json::to_string(import)?
273                ],
274            )
275            .map_err(storage_err)?;
276        }
277        for occurrence in data.occurrences {
278            tx.execute(
279                "INSERT INTO occurrences(id, symbol_id, file_id, is_definition, json) VALUES(?1, ?2, ?3, ?4, ?5)",
280                params![
281                    occurrence_id(
282                        &occurrence.file_id.0,
283                        &occurrence.symbol_id.0,
284                        occurrence.range.as_ref().map(|range| range.start),
285                        occurrence.is_definition,
286                    ),
287                    &occurrence.symbol_id.0,
288                    &occurrence.file_id.0,
289                    if occurrence.is_definition { 1 } else { 0 },
290                    serde_json::to_string(occurrence)?
291                ],
292            )
293            .map_err(storage_err)?;
294        }
295        for fact in data.analysis_facts {
296            tx.execute(
297                "INSERT INTO analysis_facts(id, file_id, source_type, target, json) VALUES(?1, ?2, ?3, ?4, ?5)",
298                params![
299                    &fact.id,
300                    &fact.file_id.0,
301                    source_type_name(&fact.source_type),
302                    &fact.target,
303                    serde_json::to_string(fact)?
304                ],
305            )
306            .map_err(storage_err)?;
307        }
308        tx.commit().map_err(storage_err)?;
309        Ok(())
310    }
311
312    fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>> {
313        let conn = self
314            .connection
315            .lock()
316            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
317        let mut stmt = conn
318            .prepare("SELECT json FROM files ORDER BY path LIMIT ?1 OFFSET ?2")
319            .map_err(storage_err)?;
320        let rows = stmt
321            .query_map(params![limit as i64, offset as i64], |row| {
322                row.get::<_, String>(0)
323            })
324            .map_err(storage_err)?;
325        collect_json(rows)
326    }
327
328    fn get_file_by_path(&self, path: &Path) -> Result<Option<File>> {
329        let conn = self
330            .connection
331            .lock()
332            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
333        let raw: Option<String> = conn
334            .query_row(
335                "SELECT json FROM files WHERE path = ?1",
336                params![path.to_string_lossy().as_ref()],
337                |row| row.get(0),
338            )
339            .optional()
340            .map_err(storage_err)?;
341        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
342            .transpose()
343    }
344
345    fn list_symbols(
346        &self,
347        query: Option<&str>,
348        limit: usize,
349        offset: usize,
350    ) -> Result<Vec<Symbol>> {
351        let conn = self
352            .connection
353            .lock()
354            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
355        let pattern = format!("%{}%", query.unwrap_or_default());
356        let mut stmt = conn
357            .prepare(
358                "SELECT json FROM symbols WHERE (?1 = '%%' OR name LIKE ?1 COLLATE NOCASE OR qualified_name LIKE ?1 COLLATE NOCASE) ORDER BY qualified_name LIMIT ?2 OFFSET ?3",
359            )
360            .map_err(storage_err)?;
361        let rows = stmt
362            .query_map(params![pattern, limit as i64, offset as i64], |row| {
363                row.get::<_, String>(0)
364            })
365            .map_err(storage_err)?;
366        collect_json(rows)
367    }
368
369    fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>> {
370        let conn = self
371            .connection
372            .lock()
373            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
374        let raw: Option<String> = conn
375            .query_row(
376                "SELECT json FROM symbols WHERE id = ?1",
377                params![&id.0],
378                |row| row.get(0),
379            )
380            .optional()
381            .map_err(storage_err)?;
382        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
383            .transpose()
384    }
385
386    fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>> {
387        let conn = self
388            .connection
389            .lock()
390            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
391        let mut stmt = conn
392            .prepare("SELECT json FROM chunks WHERE file_id = ?1 ORDER BY start_line")
393            .map_err(storage_err)?;
394        let rows = stmt
395            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
396            .map_err(storage_err)?;
397        collect_json(rows)
398    }
399
400    fn all_chunks(&self) -> Result<Vec<CodeChunk>> {
401        let conn = self
402            .connection
403            .lock()
404            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
405        let mut stmt = conn
406            .prepare("SELECT json FROM chunks ORDER BY file_id, start_line")
407            .map_err(storage_err)?;
408        let rows = stmt
409            .query_map([], |row| row.get::<_, String>(0))
410            .map_err(storage_err)?;
411        collect_json(rows)
412    }
413
414    fn tests(&self) -> Result<Vec<TestTarget>> {
415        let conn = self
416            .connection
417            .lock()
418            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
419        let mut stmt = conn
420            .prepare("SELECT json FROM tests ORDER BY file_id")
421            .map_err(storage_err)?;
422        let rows = stmt
423            .query_map([], |row| row.get::<_, String>(0))
424            .map_err(storage_err)?;
425        collect_json(rows)
426    }
427
428    fn imports(&self) -> Result<Vec<Import>> {
429        let conn = self
430            .connection
431            .lock()
432            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
433        let mut stmt = conn
434            .prepare("SELECT json FROM imports ORDER BY file_id")
435            .map_err(storage_err)?;
436        let rows = stmt
437            .query_map([], |row| row.get::<_, String>(0))
438            .map_err(storage_err)?;
439        collect_json(rows)
440    }
441
442    fn analysis_facts(
443        &self,
444        source_type: Option<EvidenceSourceType>,
445        limit: usize,
446    ) -> Result<Vec<AnalysisFact>> {
447        let conn = self
448            .connection
449            .lock()
450            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
451        let limit = limit.min(i64::MAX as usize) as i64;
452        let rows = if let Some(source_type) = source_type {
453            let mut stmt = conn
454                .prepare(
455                    "SELECT json FROM analysis_facts WHERE source_type = ?1 ORDER BY file_id, target LIMIT ?2",
456                )
457                .map_err(storage_err)?;
458            let rows = stmt
459                .query_map(params![source_type_name(&source_type), limit], |row| {
460                    row.get::<_, String>(0)
461                })
462                .map_err(storage_err)?;
463            collect_json(rows)?
464        } else {
465            let mut stmt = conn
466                .prepare("SELECT json FROM analysis_facts ORDER BY file_id, target LIMIT ?1")
467                .map_err(storage_err)?;
468            let rows = stmt
469                .query_map(params![limit], |row| row.get::<_, String>(0))
470                .map_err(storage_err)?;
471            collect_json(rows)?
472        };
473        Ok(rows)
474    }
475
476    fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>> {
477        let conn = self
478            .connection
479            .lock()
480            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
481        let mut stmt = conn
482            .prepare(
483                "SELECT json FROM occurrences WHERE symbol_id = ?1 AND is_definition = 0 ORDER BY file_id LIMIT ?2",
484            )
485            .map_err(storage_err)?;
486        let rows = stmt
487            .query_map(params![&id.0, limit as i64], |row| row.get::<_, String>(0))
488            .map_err(storage_err)?;
489        collect_json(rows)
490    }
491
492    fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>> {
493        let conn = self
494            .connection
495            .lock()
496            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
497        let mut stmt = conn
498            .prepare("SELECT json FROM occurrences WHERE file_id = ?1 ORDER BY symbol_id")
499            .map_err(storage_err)?;
500        let rows = stmt
501            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
502            .map_err(storage_err)?;
503        collect_json(rows)
504    }
505
506    fn symbols_for_file(&self, file_id: &FileId) -> Result<Vec<Symbol>> {
507        let conn = self
508            .connection
509            .lock()
510            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
511        let mut stmt = conn
512            .prepare("SELECT json FROM symbols WHERE file_id = ?1 ORDER BY name")
513            .map_err(storage_err)?;
514        let rows = stmt
515            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
516            .map_err(storage_err)?;
517        collect_json(rows)
518    }
519
520    fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
521        let conn = self
522            .connection
523            .lock()
524            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
525        let pattern = format!("%{}%", query);
526        let mut stmt = conn
527            .prepare("SELECT json FROM chunks WHERE text LIKE ?1 LIMIT ?2")
528            .map_err(storage_err)?;
529        let rows = stmt
530            .query_map(params![pattern, limit as i64], |row| {
531                row.get::<_, String>(0)
532            })
533            .map_err(storage_err)?;
534        collect_json(rows)
535    }
536
537    fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
538        let conn = self
539            .connection
540            .lock()
541            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
542        let match_pat = format!("%{}%", pattern);
543        let mut stmt = conn
544            .prepare("SELECT json FROM files WHERE path LIKE ?1 COLLATE NOCASE")
545            .map_err(storage_err)?;
546        let rows = stmt
547            .query_map(params![match_pat], |row| row.get::<_, String>(0))
548            .map_err(storage_err)?;
549        collect_json(rows)
550    }
551
552    fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
553        if file_ids.is_empty() {
554            return Ok(Vec::new());
555        }
556        let conn = self
557            .connection
558            .lock()
559            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
560
561        let placeholders = file_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
562        let sql = format!("SELECT json FROM tests WHERE file_id IN ({})", placeholders);
563        let mut stmt = conn.prepare(&sql).map_err(storage_err)?;
564
565        let params = rusqlite::params_from_iter(file_ids.iter().map(|id| &id.0));
566        let rows = stmt
567            .query_map(params, |row| row.get::<_, String>(0))
568            .map_err(storage_err)?;
569        collect_json(rows)
570    }
571}
572
573impl GraphStore for SqliteStore {
574    fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
575        let mut conn = self
576            .connection
577            .lock()
578            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
579        let tx = conn.transaction().map_err(storage_err)?;
580        tx.execute("DELETE FROM graph_edges", [])
581            .map_err(storage_err)?;
582        tx.execute("DELETE FROM graph_nodes", [])
583            .map_err(storage_err)?;
584        for node in nodes {
585            tx.execute(
586                "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
587                params![&node.id.0, &node.label, serde_json::to_string(node)?],
588            )
589            .map_err(storage_err)?;
590        }
591        for edge in edges {
592            tx.execute(
593                "INSERT INTO graph_edges(id, from_id, to_id, edge_type, json) VALUES(?1, ?2, ?3, ?4, ?5)",
594                params![
595                    &edge.id.0,
596                    &edge.from.0,
597                    &edge.to.0,
598                    format!("{:?}", edge.edge_type),
599                    serde_json::to_string(edge)?
600                ],
601            )
602            .map_err(storage_err)?;
603        }
604        tx.commit().map_err(storage_err)?;
605        Ok(())
606    }
607
608    fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)> {
609        let conn = self
610            .connection
611            .lock()
612            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
613        let mut stmt = conn
614            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 OR to_id = ?1 LIMIT ?2")
615            .map_err(storage_err)?;
616        let rows = stmt
617            .query_map(params![node, limit as i64], |row| row.get::<_, String>(0))
618            .map_err(storage_err)?;
619        let edges: Vec<GraphEdge> = collect_json(rows)?;
620        let mut ids = edges
621            .iter()
622            .flat_map(|edge| [edge.from.0.clone(), edge.to.0.clone()])
623            .collect::<Vec<_>>();
624        ids.sort();
625        ids.dedup();
626        let mut nodes = Vec::new();
627        for id in ids {
628            if let Some(node) = graph_node_by_id(&conn, &id)? {
629                nodes.push(node);
630            }
631        }
632        Ok((nodes, edges))
633    }
634
635    fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>> {
636        use std::collections::{HashSet, VecDeque};
637
638        let conn = self
639            .connection
640            .lock()
641            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
642
643        // Prepare the statement once outside the BFS loop to avoid
644        // O(N) statement recompilation on large graphs.
645        let mut edge_stmt = conn
646            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1")
647            .map_err(storage_err)?;
648
649        let mut queue = VecDeque::from([(from.to_string(), Vec::<GraphEdge>::new())]);
650        let mut seen = HashSet::new();
651        while let Some((node, path)) = queue.pop_front() {
652            if node == to {
653                return Ok(path);
654            }
655            if path.len() >= max_depth || !seen.insert(node.clone()) {
656                continue;
657            }
658            let rows = edge_stmt
659                .query_map(params![&node], |row| row.get::<_, String>(0))
660                .map_err(storage_err)?;
661            let edges: Vec<GraphEdge> = collect_json(rows)?;
662            for edge in edges {
663                let mut next_path = path.clone();
664                next_path.push(edge.clone());
665                queue.push_back((edge.to.0.clone(), next_path));
666            }
667        }
668        Ok(Vec::new())
669    }
670}
671
672fn collect_json<T, F>(rows: rusqlite::MappedRows<'_, F>) -> Result<Vec<T>>
673where
674    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
675    T: serde::de::DeserializeOwned,
676{
677    let mut out = Vec::new();
678    for row in rows {
679        let raw = row.map_err(storage_err)?;
680        out.push(serde_json::from_str(&raw)?);
681    }
682    Ok(out)
683}
684
685fn graph_node_by_id(conn: &Connection, id: &str) -> Result<Option<GraphNode>> {
686    let raw: Option<String> = conn
687        .query_row(
688            "SELECT json FROM graph_nodes WHERE id = ?1",
689            params![id],
690            |row| row.get(0),
691        )
692        .optional()
693        .map_err(storage_err)?;
694    raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
695        .transpose()
696}
697
698fn storage_err(err: rusqlite::Error) -> OkError {
699    OkError::Storage(err.to_string())
700}
701
702fn occurrence_id(file_id: &str, value: &str, line: Option<u32>, flag: bool) -> String {
703    use sha2::{Digest, Sha256};
704    let mut hasher = Sha256::new();
705    hasher.update(file_id.as_bytes());
706    hasher.update(b":");
707    hasher.update(value.as_bytes());
708    hasher.update(b":");
709    hasher.update(line.unwrap_or_default().to_string().as_bytes());
710    hasher.update(b":");
711    hasher.update(if flag { b"1" } else { b"0" });
712    format!("{:x}", hasher.finalize())
713}
714
715fn source_type_name(source_type: &EvidenceSourceType) -> &'static str {
716    match source_type {
717        EvidenceSourceType::TreeSitter => "tree_sitter",
718        EvidenceSourceType::Scip => "scip",
719        EvidenceSourceType::Lsp => "lsp",
720        EvidenceSourceType::Regex => "regex",
721        EvidenceSourceType::Lexical => "lexical",
722        EvidenceSourceType::Semantic => "semantic",
723        EvidenceSourceType::Runtime => "runtime",
724        EvidenceSourceType::GitHistory => "git_history",
725        EvidenceSourceType::StaticAnalysis => "static_analysis",
726        EvidenceSourceType::ExternalIntegration => "external_integration",
727        EvidenceSourceType::Heuristic => "heuristic",
728    }
729}
730
731#[cfg(test)]
732mod tests {
733    use super::SqliteStore;
734    use chrono::Utc;
735    use open_kioku_core::{
736        AnalysisFact, Confidence, EdgeId, Evidence, EvidenceId, EvidenceSourceType, File, FileId,
737        GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, IndexManifest, IndexQuality, Language,
738        LineRange, NodeId, Repository, RepositoryId, Symbol, SymbolId, SymbolKind,
739    };
740    use open_kioku_storage::{GraphStore, IndexData, MetadataStore};
741
742    fn make_store() -> SqliteStore {
743        SqliteStore::open(":memory:").expect("in-memory store")
744    }
745
746    fn make_file(id: &str, path: &str) -> File {
747        File {
748            id: FileId::new(id),
749            repository_id: RepositoryId::new("repo"),
750            path: path.into(),
751            language: Language::Rust,
752            size_bytes: 100,
753            content_hash: format!("hash-{id}"),
754            is_generated: false,
755            is_vendor: false,
756        }
757    }
758
759    fn make_symbol(id: &str, name: &str, file_id: &str) -> Symbol {
760        Symbol {
761            id: SymbolId::new(id),
762            name: name.into(),
763            qualified_name: format!("module::{name}"),
764            kind: SymbolKind::Function,
765            file_id: FileId::new(file_id),
766            range: Some(LineRange::single(1)),
767            language: Language::Rust,
768            confidence: Confidence::High,
769            provenance: EvidenceSourceType::TreeSitter,
770        }
771    }
772
773    fn evidence() -> Evidence {
774        Evidence {
775            id: EvidenceId::new("ev-1"),
776            source: "test".into(),
777            source_type: EvidenceSourceType::Lexical,
778            file_range: None,
779            symbol_id: None,
780            confidence: Confidence::Medium,
781            message: "test evidence".into(),
782            indexed_at: Utc::now(),
783        }
784    }
785
786    fn make_manifest() -> IndexManifest {
787        IndexManifest {
788            repository: Repository {
789                id: RepositoryId::new("repo"),
790                name: "repo".into(),
791                root: std::path::PathBuf::from("."),
792                branch: None,
793                commit: None,
794                indexed_at: None,
795            },
796            file_count: 2,
797            symbol_count: 2,
798            chunk_count: 0,
799            indexed_at: Utc::now(),
800            schema_version: 1,
801            quality: IndexQuality::default(),
802        }
803    }
804
805    #[test]
806    fn replace_index_and_list_files() {
807        let store = make_store();
808        let file1 = make_file("f1", "src/main.rs");
809        let file2 = make_file("f2", "src/lib.rs");
810        let sym1 = make_symbol("s1", "main_fn", "f1");
811
812        let manifest = make_manifest();
813        let files = vec![file1.clone(), file2.clone()];
814        let symbols = vec![sym1.clone()];
815
816        let data = IndexData {
817            manifest: &manifest,
818            files: &files,
819            symbols: &symbols,
820            occurrences: &[],
821            chunks: &[],
822            imports: &[],
823            tests: &[],
824            analysis_facts: &[],
825        };
826        store.replace_index(data).unwrap();
827
828        let files_list = store.list_files(100, 0).unwrap();
829        assert_eq!(files_list.len(), 2);
830
831        let by_path = store
832            .get_file_by_path(&std::path::PathBuf::from("src/main.rs"))
833            .unwrap();
834        assert!(by_path.is_some());
835        assert_eq!(by_path.unwrap().id, file1.id);
836    }
837
838    #[test]
839    fn replace_index_persists_analysis_facts() {
840        let store = make_store();
841        let file = make_file("f1", "src/handler.rs");
842        let manifest = make_manifest();
843        let runtime_fact = AnalysisFact {
844            id: "runtime-1".into(),
845            file_id: file.id.clone(),
846            symbol_id: None,
847            target: "GET /api/orders".into(),
848            target_kind: GraphNodeType::Endpoint,
849            edge_type: GraphEdgeType::ExposesEndpoint,
850            range: Some(LineRange::single(12)),
851            confidence: Confidence::High,
852            source: "open-kioku-runtime:.ok/runtime/spans.jsonl".into(),
853            source_type: EvidenceSourceType::Runtime,
854            message: "runtime endpoint observed in local trace artifact".into(),
855        };
856        let static_fact = AnalysisFact {
857            id: "static-1".into(),
858            file_id: file.id.clone(),
859            symbol_id: None,
860            target: "orders".into(),
861            target_kind: GraphNodeType::DatabaseTable,
862            edge_type: GraphEdgeType::ReadsTable,
863            range: None,
864            confidence: Confidence::Medium,
865            source: "open-kioku-static".into(),
866            source_type: EvidenceSourceType::StaticAnalysis,
867            message: "static fact".into(),
868        };
869        let git_fact = AnalysisFact {
870            id: "git-1".into(),
871            file_id: file.id.clone(),
872            symbol_id: None,
873            target: "tests/handler_test.rs".into(),
874            target_kind: GraphNodeType::Test,
875            edge_type: GraphEdgeType::ChangedBy,
876            range: None,
877            confidence: Confidence::High,
878            source: "git-history:abc123".into(),
879            source_type: EvidenceSourceType::GitHistory,
880            message: "git co-change observed in 1 commit(s), recency weight 1.00".into(),
881        };
882
883        store
884            .replace_index(IndexData {
885                manifest: &manifest,
886                files: &[file],
887                symbols: &[],
888                occurrences: &[],
889                chunks: &[],
890                imports: &[],
891                tests: &[],
892                analysis_facts: &[runtime_fact.clone(), static_fact, git_fact.clone()],
893            })
894            .unwrap();
895
896        let runtime = store
897            .analysis_facts(Some(EvidenceSourceType::Runtime), 10)
898            .unwrap();
899        assert_eq!(runtime.len(), 1);
900        assert_eq!(runtime[0].id, runtime_fact.id);
901        assert_eq!(runtime[0].target, runtime_fact.target);
902        let git = store
903            .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
904            .unwrap();
905        assert_eq!(git.len(), 1);
906        assert_eq!(git[0].id, git_fact.id);
907        assert_eq!(git[0].target, git_fact.target);
908        let all = store.analysis_facts(None, 10).unwrap();
909        assert_eq!(all.len(), 3);
910    }
911
912    #[test]
913    fn list_symbols_with_filter() {
914        let store = make_store();
915        let file = make_file("f1", "src/lib.rs");
916        let sym_a = make_symbol("s1", "alpha_handler", "f1");
917        let sym_b = make_symbol("s2", "beta_worker", "f1");
918        let manifest = make_manifest();
919        let files = vec![file];
920        let symbols = vec![sym_a, sym_b];
921        let data = IndexData {
922            manifest: &manifest,
923            files: &files,
924            symbols: &symbols,
925            occurrences: &[],
926            chunks: &[],
927            imports: &[],
928            tests: &[],
929            analysis_facts: &[],
930        };
931        store.replace_index(data).unwrap();
932
933        let all = store.list_symbols(None, 100, 0).unwrap();
934        assert_eq!(all.len(), 2);
935
936        let filtered = store.list_symbols(Some("alpha"), 10, 0).unwrap();
937        assert_eq!(filtered.len(), 1);
938        assert_eq!(filtered[0].name, "alpha_handler");
939    }
940
941    #[test]
942    fn replace_graph_and_neighbors() {
943        let store = make_store();
944        // First we need an index so that the graph tables exist.
945        let file = make_file("f1", "src/lib.rs");
946        let manifest = make_manifest();
947        let files = vec![file];
948        let data = IndexData {
949            manifest: &manifest,
950            files: &files,
951            symbols: &[],
952            occurrences: &[],
953            chunks: &[],
954            imports: &[],
955            tests: &[],
956            analysis_facts: &[],
957        };
958        store.replace_index(data).unwrap();
959
960        let node_a = GraphNode {
961            id: NodeId::new("file:src/lib.rs"),
962            node_type: GraphNodeType::File,
963            label: "src/lib.rs".into(),
964            file_id: Some(FileId::new("f1")),
965            symbol_id: None,
966        };
967        let node_b = GraphNode {
968            id: NodeId::new("symbol:s1"),
969            node_type: GraphNodeType::Function,
970            label: "worker".into(),
971            file_id: Some(FileId::new("f1")),
972            symbol_id: Some(SymbolId::new("s1")),
973        };
974        let edge = GraphEdge {
975            id: EdgeId::new("e1"),
976            from: node_a.id.clone(),
977            to: node_b.id.clone(),
978            edge_type: GraphEdgeType::Defines,
979            evidence: evidence(),
980        };
981
982        store
983            .replace_graph(
984                &[node_a.clone(), node_b.clone()],
985                std::slice::from_ref(&edge),
986            )
987            .unwrap();
988
989        let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
990        assert_eq!(edges.len(), 1);
991        assert_eq!(edges[0].id.0, "e1");
992        assert!(nodes.iter().any(|n| n.id == node_a.id));
993    }
994
995    #[test]
996    fn shortest_path_finds_direct_route() {
997        let store = make_store();
998        let file = make_file("f1", "src/lib.rs");
999        let manifest = make_manifest();
1000        let files = vec![file];
1001        let data = IndexData {
1002            manifest: &manifest,
1003            files: &files,
1004            symbols: &[],
1005            occurrences: &[],
1006            chunks: &[],
1007            imports: &[],
1008            tests: &[],
1009            analysis_facts: &[],
1010        };
1011        store.replace_index(data).unwrap();
1012
1013        let node_a = GraphNode {
1014            id: NodeId::new("a"),
1015            node_type: GraphNodeType::File,
1016            label: "a".into(),
1017            file_id: None,
1018            symbol_id: None,
1019        };
1020        let node_b = GraphNode {
1021            id: NodeId::new("b"),
1022            node_type: GraphNodeType::File,
1023            label: "b".into(),
1024            file_id: None,
1025            symbol_id: None,
1026        };
1027        let edge = GraphEdge {
1028            id: EdgeId::new("a-b"),
1029            from: node_a.id.clone(),
1030            to: node_b.id.clone(),
1031            edge_type: GraphEdgeType::Defines,
1032            evidence: evidence(),
1033        };
1034        store.replace_graph(&[node_a, node_b], &[edge]).unwrap();
1035
1036        let path = store.shortest_path("a", "b", 5).unwrap();
1037        assert_eq!(path.len(), 1);
1038        assert_eq!(path[0].id.0, "a-b");
1039    }
1040
1041    #[test]
1042    fn shortest_path_returns_empty_when_no_route() {
1043        let store = make_store();
1044        let file = make_file("f1", "src/lib.rs");
1045        let manifest = make_manifest();
1046        let files = vec![file];
1047        let data = IndexData {
1048            manifest: &manifest,
1049            files: &files,
1050            symbols: &[],
1051            occurrences: &[],
1052            chunks: &[],
1053            imports: &[],
1054            tests: &[],
1055            analysis_facts: &[],
1056        };
1057        store.replace_index(data).unwrap();
1058        store.replace_graph(&[], &[]).unwrap();
1059
1060        let path = store.shortest_path("x", "y", 5).unwrap();
1061        assert!(path.is_empty());
1062    }
1063}