Skip to main content

open_kioku_storage_sqlite/
lib.rs

1use open_kioku_core::{
2    AnalysisFact, CodeChunk, Confidence, EvidenceSourceType, File, FileId, FileProvenance,
3    GitCochangeEdge, GitCommitId, GitCommitRecord, GitFileTouch, GitSymbolTouch, GraphEdge,
4    GraphEdgeType, GraphNode, GraphNodeType, HistoryRecordId, HistorySnapshot, HistorySummary,
5    Import, IndexManifest, ProvenanceTouch, Symbol, SymbolId, SymbolOccurrence, SymbolProvenance,
6    TestTarget, HISTORY_SCHEMA_VERSION,
7};
8use open_kioku_errors::{OkError, Result};
9use open_kioku_storage::{
10    GraphCounts, GraphSchemaCounts, GraphStore, HistoryStore, IndexData, MetadataStore,
11    PartialIndexUpdate,
12};
13use rusqlite::{params, Connection, OptionalExtension, Transaction};
14use std::collections::BTreeSet;
15use std::path::{Path, PathBuf};
16use std::sync::Mutex;
17
18const SQLITE_HISTORY_SCHEMA_VERSION: i64 = 1;
19pub const SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION: i64 = 2;
20const SQLITE_GRAPH_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
21const SQLITE_SUPPORTED_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
22
23const HISTORY_SCHEMA_V1: &str = r#"
24CREATE TABLE IF NOT EXISTS git_commits (
25  id TEXT PRIMARY KEY,
26  authored_at TEXT NOT NULL,
27  committed_at TEXT NOT NULL,
28  author_email TEXT,
29  json TEXT NOT NULL
30);
31CREATE INDEX IF NOT EXISTS idx_git_commits_committed_at
32  ON git_commits(committed_at DESC, id);
33CREATE INDEX IF NOT EXISTS idx_git_commits_author_email
34  ON git_commits(author_email);
35
36CREATE TABLE IF NOT EXISTS git_file_touches (
37  id TEXT PRIMARY KEY,
38  commit_id TEXT NOT NULL,
39  path TEXT NOT NULL,
40  previous_path TEXT,
41  touched_at TEXT NOT NULL,
42  json TEXT NOT NULL,
43  FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
44);
45CREATE INDEX IF NOT EXISTS idx_git_file_touches_path
46  ON git_file_touches(path, touched_at DESC);
47CREATE INDEX IF NOT EXISTS idx_git_file_touches_previous_path
48  ON git_file_touches(previous_path, touched_at DESC);
49CREATE INDEX IF NOT EXISTS idx_git_file_touches_commit
50  ON git_file_touches(commit_id);
51
52CREATE TABLE IF NOT EXISTS git_symbol_touches (
53  id TEXT PRIMARY KEY,
54  commit_id TEXT NOT NULL,
55  symbol_id TEXT,
56  qualified_name TEXT NOT NULL,
57  file_path TEXT NOT NULL,
58  touched_at TEXT NOT NULL,
59  json TEXT NOT NULL,
60  FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
61);
62CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_file
63  ON git_symbol_touches(file_path, touched_at DESC);
64CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_symbol
65  ON git_symbol_touches(symbol_id, touched_at DESC);
66CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_commit
67  ON git_symbol_touches(commit_id);
68
69CREATE TABLE IF NOT EXISTS git_cochange_edges (
70  id TEXT PRIMARY KEY,
71  path TEXT NOT NULL,
72  cochanged_path TEXT NOT NULL,
73  commit_count INTEGER NOT NULL,
74  recency_weight REAL NOT NULL,
75  last_changed_at TEXT,
76  json TEXT NOT NULL,
77  UNIQUE(path, cochanged_path)
78);
79CREATE INDEX IF NOT EXISTS idx_git_cochange_edges_path
80  ON git_cochange_edges(path, recency_weight DESC, commit_count DESC);
81
82CREATE TABLE IF NOT EXISTS git_review_events (
83  id TEXT PRIMARY KEY,
84  commit_id TEXT,
85  path TEXT,
86  reviewer_identity TEXT NOT NULL,
87  observed_at TEXT NOT NULL,
88  json TEXT NOT NULL
89);
90CREATE INDEX IF NOT EXISTS idx_git_review_events_path
91  ON git_review_events(path, observed_at DESC);
92CREATE INDEX IF NOT EXISTS idx_git_review_events_commit
93  ON git_review_events(commit_id, observed_at DESC);
94CREATE INDEX IF NOT EXISTS idx_git_review_events_reviewer
95  ON git_review_events(reviewer_identity, observed_at DESC);
96"#;
97
98pub struct SqliteStore {
99    path: PathBuf,
100    connection: Mutex<Connection>,
101}
102
103impl SqliteStore {
104    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
105        let path = path.as_ref().to_path_buf();
106        if let Some(parent) = path.parent() {
107            std::fs::create_dir_all(parent)?;
108        }
109        let connection = Connection::open_with_flags(
110            &path,
111            rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE
112                | rusqlite::OpenFlags::SQLITE_OPEN_CREATE
113                | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
114        )
115        .map_err(storage_err)?;
116        let store = Self {
117            path,
118            connection: Mutex::new(connection),
119        };
120        store.initialize()?;
121        Ok(store)
122    }
123
124    pub fn path(&self) -> &Path {
125        &self.path
126    }
127}
128
129impl MetadataStore for SqliteStore {
130    fn initialize(&self) -> Result<()> {
131        let mut conn = self
132            .connection
133            .lock()
134            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
135        ensure_supported_sqlite_schema(&conn)?;
136        conn.execute_batch(
137            r#"
138            PRAGMA journal_mode = WAL;
139            PRAGMA foreign_keys = ON;
140            CREATE TABLE IF NOT EXISTS manifests (
141              id INTEGER PRIMARY KEY CHECK (id = 1),
142              json TEXT NOT NULL
143            );
144            CREATE TABLE IF NOT EXISTS files (
145              id TEXT PRIMARY KEY,
146              path TEXT NOT NULL UNIQUE,
147              json TEXT NOT NULL
148            );
149            CREATE TABLE IF NOT EXISTS symbols (
150              id TEXT PRIMARY KEY,
151              name TEXT NOT NULL,
152              qualified_name TEXT NOT NULL,
153              file_id TEXT NOT NULL,
154              json TEXT NOT NULL
155            );
156            CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
157            CREATE TABLE IF NOT EXISTS chunks (
158              id TEXT PRIMARY KEY,
159              file_id TEXT NOT NULL,
160              start_line INTEGER NOT NULL,
161              end_line INTEGER NOT NULL,
162              text TEXT NOT NULL,
163              json TEXT NOT NULL
164            );
165            CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
166            CREATE TABLE IF NOT EXISTS tests (
167              id TEXT PRIMARY KEY,
168              file_id TEXT NOT NULL,
169              json TEXT NOT NULL
170            );
171            CREATE INDEX IF NOT EXISTS idx_tests_file ON tests(file_id);
172            CREATE TABLE IF NOT EXISTS imports (
173              id TEXT PRIMARY KEY,
174              file_id TEXT NOT NULL,
175              imported TEXT NOT NULL,
176              json TEXT NOT NULL
177            );
178            CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file_id);
179            CREATE TABLE IF NOT EXISTS occurrences (
180              id TEXT PRIMARY KEY,
181              symbol_id TEXT NOT NULL,
182              file_id TEXT NOT NULL,
183              is_definition INTEGER NOT NULL,
184              json TEXT NOT NULL
185            );
186            CREATE INDEX IF NOT EXISTS idx_occurrences_symbol ON occurrences(symbol_id);
187            CREATE INDEX IF NOT EXISTS idx_occurrences_file ON occurrences(file_id);
188            CREATE TABLE IF NOT EXISTS analysis_facts (
189              id TEXT PRIMARY KEY,
190              file_id TEXT NOT NULL,
191              source_type TEXT NOT NULL,
192              target TEXT NOT NULL,
193              json TEXT NOT NULL
194            );
195            CREATE INDEX IF NOT EXISTS idx_analysis_facts_file ON analysis_facts(file_id);
196            CREATE INDEX IF NOT EXISTS idx_analysis_facts_source ON analysis_facts(source_type);
197            CREATE TABLE IF NOT EXISTS vector_targets (
198              id TEXT PRIMARY KEY,
199              file_id TEXT NOT NULL,
200              target_kind TEXT NOT NULL,
201              content_hash TEXT NOT NULL,
202              vector_id INTEGER NOT NULL,
203              model TEXT NOT NULL,
204              dimensions INTEGER NOT NULL,
205              json TEXT NOT NULL
206            );
207            CREATE INDEX IF NOT EXISTS idx_vector_targets_file ON vector_targets(file_id);
208            CREATE TABLE IF NOT EXISTS embedding_cache (
209              cache_key TEXT PRIMARY KEY,
210              target_id TEXT NOT NULL,
211              content_hash TEXT NOT NULL,
212              model TEXT NOT NULL,
213              dimensions INTEGER NOT NULL,
214              json TEXT NOT NULL
215            );
216            CREATE TABLE IF NOT EXISTS semantic_index_runs (
217              id TEXT PRIMARY KEY,
218              status TEXT NOT NULL,
219              model TEXT NOT NULL,
220              dimensions INTEGER NOT NULL,
221              vector_count INTEGER NOT NULL,
222              created_at TEXT NOT NULL,
223              json TEXT NOT NULL
224            );
225            CREATE TABLE IF NOT EXISTS semantic_coverage (
226              id TEXT PRIMARY KEY,
227              target_kind TEXT NOT NULL,
228              indexed_count INTEGER NOT NULL,
229              stale_count INTEGER NOT NULL,
230              failed_count INTEGER NOT NULL,
231              json TEXT NOT NULL
232            );
233            CREATE TABLE IF NOT EXISTS graph_nodes (
234              id TEXT PRIMARY KEY,
235              label TEXT NOT NULL,
236              node_type TEXT DEFAULT '',
237              file_id TEXT DEFAULT '',
238              symbol_id TEXT DEFAULT '',
239              json TEXT NOT NULL
240            );
241            CREATE TABLE IF NOT EXISTS graph_edges (
242              id TEXT PRIMARY KEY,
243              from_id TEXT NOT NULL,
244              to_id TEXT NOT NULL,
245              edge_type TEXT NOT NULL,
246              confidence TEXT DEFAULT '',
247              source_type TEXT DEFAULT '',
248              source_file TEXT DEFAULT '',
249              json TEXT NOT NULL
250            );
251            CREATE INDEX IF NOT EXISTS idx_graph_edges_from ON graph_edges(from_id);
252            CREATE INDEX IF NOT EXISTS idx_graph_edges_to ON graph_edges(to_id);
253            "#,
254        )
255        .map_err(storage_err)?;
256        migrate_history_schema(&mut conn)?;
257        migrate_graph_schema(&mut conn)?;
258        Ok(())
259    }
260
261    fn put_manifest(&self, manifest: &IndexManifest) -> Result<()> {
262        let conn = self
263            .connection
264            .lock()
265            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
266        let json = serde_json::to_string(manifest)?;
267        conn.execute(
268            "INSERT INTO manifests(id, json) VALUES(1, ?1) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
269            params![json],
270        )
271        .map_err(storage_err)?;
272        Ok(())
273    }
274
275    fn manifest(&self) -> Result<Option<IndexManifest>> {
276        let conn = self
277            .connection
278            .lock()
279            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
280        let raw: Option<String> = conn
281            .query_row("SELECT json FROM manifests WHERE id = 1", [], |row| {
282                row.get(0)
283            })
284            .optional()
285            .map_err(storage_err)?;
286        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
287            .transpose()
288    }
289
290    fn replace_index(&self, data: IndexData<'_>) -> Result<()> {
291        let mut conn = self
292            .connection
293            .lock()
294            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
295        let tx = conn.transaction().map_err(storage_err)?;
296        tx.execute("DELETE FROM occurrences", [])
297            .map_err(storage_err)?;
298        tx.execute("DELETE FROM analysis_facts", [])
299            .map_err(storage_err)?;
300        tx.execute("DELETE FROM imports", []).map_err(storage_err)?;
301        tx.execute("DELETE FROM tests", []).map_err(storage_err)?;
302        tx.execute("DELETE FROM chunks", []).map_err(storage_err)?;
303        tx.execute("DELETE FROM symbols", []).map_err(storage_err)?;
304        tx.execute("DELETE FROM files", []).map_err(storage_err)?;
305        tx.execute("DELETE FROM manifests", [])
306            .map_err(storage_err)?;
307        tx.execute(
308            "INSERT INTO manifests(id, json) VALUES(1, ?1)",
309            params![serde_json::to_string(data.manifest)?],
310        )
311        .map_err(storage_err)?;
312        insert_index_rows(
313            &tx,
314            IndexRows {
315                files: data.files,
316                symbols: data.symbols,
317                chunks: data.chunks,
318                tests: data.tests,
319                imports: data.imports,
320                occurrences: data.occurrences,
321                analysis_facts: data.analysis_facts,
322            },
323        )?;
324        tx.commit().map_err(storage_err)?;
325        Ok(())
326    }
327
328    fn replace_files_index(&self, update: PartialIndexUpdate<'_>) -> Result<()> {
329        let mut conn = self
330            .connection
331            .lock()
332            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
333        let tx = conn.transaction().map_err(storage_err)?;
334        let affected_file_ids = update
335            .changed_files
336            .iter()
337            .map(|file| file.id.clone())
338            .chain(update.deleted_file_ids.iter().cloned())
339            .collect::<BTreeSet<_>>();
340        let mut affected_file_paths = update
341            .changed_files
342            .iter()
343            .map(|file| file.path.to_string_lossy().to_string())
344            .collect::<BTreeSet<_>>();
345        for file_id in &affected_file_ids {
346            let path: Option<String> = tx
347                .query_row(
348                    "SELECT path FROM files WHERE id = ?1",
349                    params![&file_id.0],
350                    |row| row.get(0),
351                )
352                .optional()
353                .map_err(storage_err)?;
354            if let Some(path) = path {
355                affected_file_paths.insert(path);
356            }
357        }
358
359        let mut affected_symbol_ids = update
360            .symbols
361            .iter()
362            .map(|symbol| symbol.id.clone())
363            .collect::<BTreeSet<_>>();
364        for file_id in &affected_file_ids {
365            let mut stmt = tx
366                .prepare("SELECT id FROM symbols WHERE file_id = ?1")
367                .map_err(storage_err)?;
368            let rows = stmt
369                .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
370                .map_err(storage_err)?;
371            for row in rows {
372                affected_symbol_ids.insert(SymbolId::new(row.map_err(storage_err)?));
373            }
374        }
375
376        let mut affected_node_ids = update
377            .graph_nodes
378            .iter()
379            .map(|node| node.id.0.clone())
380            .collect::<BTreeSet<_>>();
381        for file_id in &affected_file_ids {
382            let mut stmt = tx
383                .prepare("SELECT id FROM graph_nodes WHERE file_id = ?1")
384                .map_err(storage_err)?;
385            let rows = stmt
386                .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
387                .map_err(storage_err)?;
388            for row in rows {
389                affected_node_ids.insert(row.map_err(storage_err)?);
390            }
391        }
392        for symbol_id in &affected_symbol_ids {
393            let mut stmt = tx
394                .prepare("SELECT id FROM graph_nodes WHERE symbol_id = ?1")
395                .map_err(storage_err)?;
396            let rows = stmt
397                .query_map(params![&symbol_id.0], |row| row.get::<_, String>(0))
398                .map_err(storage_err)?;
399            for row in rows {
400                affected_node_ids.insert(row.map_err(storage_err)?);
401            }
402        }
403
404        tx.execute(
405            "INSERT INTO manifests(id, json) VALUES(1, ?1)
406             ON CONFLICT(id) DO UPDATE SET json = excluded.json",
407            params![serde_json::to_string(update.manifest)?],
408        )
409        .map_err(storage_err)?;
410
411        for node_id in &affected_node_ids {
412            tx.execute(
413                "DELETE FROM graph_edges WHERE from_id = ?1 OR to_id = ?1",
414                params![node_id],
415            )
416            .map_err(storage_err)?;
417        }
418        for path in &affected_file_paths {
419            tx.execute(
420                "DELETE FROM graph_edges WHERE source_file = ?1",
421                params![path],
422            )
423            .map_err(storage_err)?;
424        }
425        for node_id in &affected_node_ids {
426            tx.execute("DELETE FROM graph_nodes WHERE id = ?1", params![node_id])
427                .map_err(storage_err)?;
428        }
429        for file_id in &affected_file_ids {
430            tx.execute(
431                "DELETE FROM graph_nodes WHERE file_id = ?1",
432                params![&file_id.0],
433            )
434            .map_err(storage_err)?;
435        }
436        for symbol_id in &affected_symbol_ids {
437            tx.execute(
438                "DELETE FROM graph_nodes WHERE symbol_id = ?1",
439                params![&symbol_id.0],
440            )
441            .map_err(storage_err)?;
442        }
443
444        for symbol_id in &affected_symbol_ids {
445            tx.execute(
446                "DELETE FROM occurrences WHERE symbol_id = ?1",
447                params![&symbol_id.0],
448            )
449            .map_err(storage_err)?;
450        }
451        for file_id in &affected_file_ids {
452            tx.execute(
453                "DELETE FROM occurrences WHERE file_id = ?1",
454                params![&file_id.0],
455            )
456            .map_err(storage_err)?;
457            tx.execute(
458                "DELETE FROM analysis_facts WHERE file_id = ?1",
459                params![&file_id.0],
460            )
461            .map_err(storage_err)?;
462            tx.execute(
463                "DELETE FROM imports WHERE file_id = ?1",
464                params![&file_id.0],
465            )
466            .map_err(storage_err)?;
467            tx.execute("DELETE FROM tests WHERE file_id = ?1", params![&file_id.0])
468                .map_err(storage_err)?;
469            tx.execute("DELETE FROM chunks WHERE file_id = ?1", params![&file_id.0])
470                .map_err(storage_err)?;
471            tx.execute(
472                "DELETE FROM symbols WHERE file_id = ?1",
473                params![&file_id.0],
474            )
475            .map_err(storage_err)?;
476            tx.execute("DELETE FROM files WHERE id = ?1", params![&file_id.0])
477                .map_err(storage_err)?;
478        }
479
480        insert_index_rows(
481            &tx,
482            IndexRows {
483                files: update.changed_files,
484                symbols: update.symbols,
485                chunks: update.chunks,
486                tests: update.tests,
487                imports: update.imports,
488                occurrences: update.occurrences,
489                analysis_facts: update.analysis_facts,
490            },
491        )?;
492        insert_graph_rows(&tx, update.graph_nodes, update.graph_edges)?;
493        tx.commit().map_err(storage_err)?;
494        Ok(())
495    }
496
497    fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>> {
498        let conn = self
499            .connection
500            .lock()
501            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
502        let mut stmt = conn
503            .prepare("SELECT json FROM files ORDER BY path LIMIT ?1 OFFSET ?2")
504            .map_err(storage_err)?;
505        let rows = stmt
506            .query_map(params![limit as i64, offset as i64], |row| {
507                row.get::<_, String>(0)
508            })
509            .map_err(storage_err)?;
510        collect_json(rows)
511    }
512
513    fn get_file_by_path(&self, path: &Path) -> Result<Option<File>> {
514        let conn = self
515            .connection
516            .lock()
517            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
518        let raw: Option<String> = conn
519            .query_row(
520                "SELECT json FROM files WHERE path = ?1",
521                params![path.to_string_lossy().as_ref()],
522                |row| row.get(0),
523            )
524            .optional()
525            .map_err(storage_err)?;
526        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
527            .transpose()
528    }
529
530    fn list_symbols(
531        &self,
532        query: Option<&str>,
533        limit: usize,
534        offset: usize,
535    ) -> Result<Vec<Symbol>> {
536        let conn = self
537            .connection
538            .lock()
539            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
540        let pattern = format!("%{}%", query.unwrap_or_default());
541        let mut stmt = conn
542            .prepare(
543                "SELECT json FROM symbols WHERE (?1 = '%%' OR name LIKE ?1 COLLATE NOCASE OR qualified_name LIKE ?1 COLLATE NOCASE) ORDER BY qualified_name LIMIT ?2 OFFSET ?3",
544            )
545            .map_err(storage_err)?;
546        let rows = stmt
547            .query_map(params![pattern, limit as i64, offset as i64], |row| {
548                row.get::<_, String>(0)
549            })
550            .map_err(storage_err)?;
551        collect_json(rows)
552    }
553
554    fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>> {
555        let conn = self
556            .connection
557            .lock()
558            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
559        let raw: Option<String> = conn
560            .query_row(
561                "SELECT json FROM symbols WHERE id = ?1",
562                params![&id.0],
563                |row| row.get(0),
564            )
565            .optional()
566            .map_err(storage_err)?;
567        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
568            .transpose()
569    }
570
571    fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>> {
572        let conn = self
573            .connection
574            .lock()
575            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
576        let mut stmt = conn
577            .prepare("SELECT json FROM chunks WHERE file_id = ?1 ORDER BY start_line")
578            .map_err(storage_err)?;
579        let rows = stmt
580            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
581            .map_err(storage_err)?;
582        collect_json(rows)
583    }
584
585    fn all_chunks(&self) -> Result<Vec<CodeChunk>> {
586        let conn = self
587            .connection
588            .lock()
589            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
590        let mut stmt = conn
591            .prepare("SELECT json FROM chunks ORDER BY file_id, start_line")
592            .map_err(storage_err)?;
593        let rows = stmt
594            .query_map([], |row| row.get::<_, String>(0))
595            .map_err(storage_err)?;
596        collect_json(rows)
597    }
598
599    fn tests(&self) -> Result<Vec<TestTarget>> {
600        let conn = self
601            .connection
602            .lock()
603            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
604        let mut stmt = conn
605            .prepare("SELECT json FROM tests ORDER BY file_id")
606            .map_err(storage_err)?;
607        let rows = stmt
608            .query_map([], |row| row.get::<_, String>(0))
609            .map_err(storage_err)?;
610        collect_json(rows)
611    }
612
613    fn imports(&self) -> Result<Vec<Import>> {
614        let conn = self
615            .connection
616            .lock()
617            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
618        let mut stmt = conn
619            .prepare("SELECT json FROM imports ORDER BY file_id")
620            .map_err(storage_err)?;
621        let rows = stmt
622            .query_map([], |row| row.get::<_, String>(0))
623            .map_err(storage_err)?;
624        collect_json(rows)
625    }
626
627    fn analysis_facts(
628        &self,
629        source_type: Option<EvidenceSourceType>,
630        limit: usize,
631    ) -> Result<Vec<AnalysisFact>> {
632        let conn = self
633            .connection
634            .lock()
635            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
636        let limit = limit.min(i64::MAX as usize) as i64;
637        let rows = if let Some(source_type) = source_type {
638            let mut stmt = conn
639                .prepare(
640                    "SELECT json FROM analysis_facts WHERE source_type = ?1 ORDER BY file_id, target LIMIT ?2",
641                )
642                .map_err(storage_err)?;
643            let rows = stmt
644                .query_map(params![source_type_name(&source_type), limit], |row| {
645                    row.get::<_, String>(0)
646                })
647                .map_err(storage_err)?;
648            collect_json(rows)?
649        } else {
650            let mut stmt = conn
651                .prepare("SELECT json FROM analysis_facts ORDER BY file_id, target LIMIT ?1")
652                .map_err(storage_err)?;
653            let rows = stmt
654                .query_map(params![limit], |row| row.get::<_, String>(0))
655                .map_err(storage_err)?;
656            collect_json(rows)?
657        };
658        Ok(rows)
659    }
660
661    fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>> {
662        let conn = self
663            .connection
664            .lock()
665            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
666        let mut stmt = conn
667            .prepare(
668                "SELECT json FROM occurrences WHERE symbol_id = ?1 AND is_definition = 0 ORDER BY file_id LIMIT ?2",
669            )
670            .map_err(storage_err)?;
671        let rows = stmt
672            .query_map(params![&id.0, limit as i64], |row| row.get::<_, String>(0))
673            .map_err(storage_err)?;
674        collect_json(rows)
675    }
676
677    fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>> {
678        let conn = self
679            .connection
680            .lock()
681            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
682        let mut stmt = conn
683            .prepare("SELECT json FROM occurrences WHERE file_id = ?1 ORDER BY symbol_id")
684            .map_err(storage_err)?;
685        let rows = stmt
686            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
687            .map_err(storage_err)?;
688        collect_json(rows)
689    }
690
691    fn symbols_for_file(&self, file_id: &FileId) -> Result<Vec<Symbol>> {
692        let conn = self
693            .connection
694            .lock()
695            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
696        let mut stmt = conn
697            .prepare("SELECT json FROM symbols WHERE file_id = ?1 ORDER BY name")
698            .map_err(storage_err)?;
699        let rows = stmt
700            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
701            .map_err(storage_err)?;
702        collect_json(rows)
703    }
704
705    fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
706        let conn = self
707            .connection
708            .lock()
709            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
710        let pattern = format!("%{}%", query);
711        let mut stmt = conn
712            .prepare("SELECT json FROM chunks WHERE text LIKE ?1 LIMIT ?2")
713            .map_err(storage_err)?;
714        let rows = stmt
715            .query_map(params![pattern, limit as i64], |row| {
716                row.get::<_, String>(0)
717            })
718            .map_err(storage_err)?;
719        collect_json(rows)
720    }
721
722    fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
723        let conn = self
724            .connection
725            .lock()
726            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
727        let match_pat = format!("%{}%", pattern);
728        let mut stmt = conn
729            .prepare("SELECT json FROM files WHERE path LIKE ?1 COLLATE NOCASE")
730            .map_err(storage_err)?;
731        let rows = stmt
732            .query_map(params![match_pat], |row| row.get::<_, String>(0))
733            .map_err(storage_err)?;
734        collect_json(rows)
735    }
736
737    fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
738        if file_ids.is_empty() {
739            return Ok(Vec::new());
740        }
741        let conn = self
742            .connection
743            .lock()
744            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
745
746        let placeholders = file_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
747        let sql = format!("SELECT json FROM tests WHERE file_id IN ({})", placeholders);
748        let mut stmt = conn.prepare(&sql).map_err(storage_err)?;
749
750        let params = rusqlite::params_from_iter(file_ids.iter().map(|id| &id.0));
751        let rows = stmt
752            .query_map(params, |row| row.get::<_, String>(0))
753            .map_err(storage_err)?;
754        collect_json(rows)
755    }
756}
757
758impl HistoryStore for SqliteStore {
759    fn put_history_snapshot(&self, snapshot: &HistorySnapshot) -> Result<()> {
760        validate_history_snapshot(snapshot)?;
761        let mut conn = self
762            .connection
763            .lock()
764            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
765        let tx = conn.transaction().map_err(storage_err)?;
766
767        tx.execute("DELETE FROM git_review_events", [])
768            .map_err(storage_err)?;
769        tx.execute("DELETE FROM git_cochange_edges", [])
770            .map_err(storage_err)?;
771        tx.execute("DELETE FROM git_symbol_touches", [])
772            .map_err(storage_err)?;
773        tx.execute("DELETE FROM git_file_touches", [])
774            .map_err(storage_err)?;
775        tx.execute("DELETE FROM git_commits", [])
776            .map_err(storage_err)?;
777
778        for commit in &snapshot.commits {
779            tx.execute(
780                "INSERT INTO git_commits(id, authored_at, committed_at, author_email, json) VALUES(?1, ?2, ?3, ?4, ?5)",
781                params![
782                    &commit.id.0,
783                    commit.authored_at.to_rfc3339(),
784                    commit.committed_at.to_rfc3339(),
785                    commit.author.email.as_deref(),
786                    serde_json::to_string(commit)?,
787                ],
788            )
789            .map_err(storage_err)?;
790        }
791        for touch in &snapshot.file_touches {
792            tx.execute(
793                "INSERT INTO git_file_touches(id, commit_id, path, previous_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
794                params![
795                    &touch.id.0,
796                    &touch.commit_id.0,
797                    history_path(&touch.path)?,
798                    touch
799                        .previous_path
800                        .as_deref()
801                        .map(history_path)
802                        .transpose()?,
803                    touch.touched_at.to_rfc3339(),
804                    serde_json::to_string(touch)?,
805                ],
806            )
807            .map_err(storage_err)?;
808        }
809        for touch in &snapshot.symbol_touches {
810            tx.execute(
811                "INSERT INTO git_symbol_touches(id, commit_id, symbol_id, qualified_name, file_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
812                params![
813                    &touch.id.0,
814                    &touch.commit_id.0,
815                    touch.symbol_id.as_ref().map(|id| id.0.as_str()),
816                    &touch.qualified_name,
817                    history_path(&touch.file_path)?,
818                    touch.touched_at.to_rfc3339(),
819                    serde_json::to_string(touch)?,
820                ],
821            )
822            .map_err(storage_err)?;
823        }
824        for edge in &snapshot.cochange_edges {
825            tx.execute(
826                "INSERT INTO git_cochange_edges(id, path, cochanged_path, commit_count, recency_weight, last_changed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
827                params![
828                    &edge.id.0,
829                    history_path(&edge.path)?,
830                    history_path(&edge.cochanged_path)?,
831                    usize_to_i64(edge.commit_count, "co-change commit count")?,
832                    edge.recency_weight,
833                    edge.last_changed_at.map(|value| value.to_rfc3339()),
834                    serde_json::to_string(edge)?,
835                ],
836            )
837            .map_err(storage_err)?;
838        }
839        for evidence in &snapshot.reviewer_evidence {
840            let reviewer_identity = evidence
841                .reviewer
842                .email
843                .as_deref()
844                .unwrap_or(&evidence.reviewer.name);
845            tx.execute(
846                "INSERT INTO git_review_events(id, commit_id, path, reviewer_identity, observed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
847                params![
848                    &evidence.id.0,
849                    evidence.commit_id.as_ref().map(|id| id.0.as_str()),
850                    evidence.path.as_deref().map(history_path).transpose()?,
851                    reviewer_identity,
852                    evidence.observed_at.to_rfc3339(),
853                    serde_json::to_string(evidence)?,
854                ],
855            )
856            .map_err(storage_err)?;
857        }
858
859        tx.commit().map_err(storage_err)?;
860        Ok(())
861    }
862
863    fn history_for_file(&self, path: &Path, limit: usize) -> Result<HistorySummary> {
864        let normalized_path = history_path(path)?;
865        if limit == 0 {
866            return Ok(HistorySummary {
867                path: path.to_path_buf(),
868                recent_commits: Vec::new(),
869                file_touches: Vec::new(),
870                symbol_touches: Vec::new(),
871                cochange_neighbors: Vec::new(),
872                reviewer_evidence: Vec::new(),
873                truncated: false,
874                uncertainty: vec!["history query limit is zero".into()],
875            });
876        }
877
878        let conn = self
879            .connection
880            .lock()
881            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
882        let query_limit = history_query_limit(limit);
883
884        let mut commit_stmt = conn
885            .prepare(
886                "SELECT c.json FROM git_commits c
887                 WHERE EXISTS (
888                   SELECT 1 FROM git_file_touches t
889                   WHERE t.commit_id = c.id AND (t.path = ?1 OR t.previous_path = ?1)
890                 )
891                 ORDER BY c.committed_at DESC, c.id
892                 LIMIT ?2",
893            )
894            .map_err(storage_err)?;
895        let commit_rows = commit_stmt
896            .query_map(params![&normalized_path, query_limit], |row| {
897                row.get::<_, String>(0)
898            })
899            .map_err(storage_err)?;
900        let (recent_commits, commits_truncated) = collect_limited_json(commit_rows, limit)?;
901
902        let mut file_touch_stmt = conn
903            .prepare(
904                "SELECT json FROM git_file_touches
905                 WHERE path = ?1 OR previous_path = ?1
906                 ORDER BY touched_at DESC, id
907                 LIMIT ?2",
908            )
909            .map_err(storage_err)?;
910        let file_touch_rows = file_touch_stmt
911            .query_map(params![&normalized_path, query_limit], |row| {
912                row.get::<_, String>(0)
913            })
914            .map_err(storage_err)?;
915        let (file_touches, file_touches_truncated) = collect_limited_json(file_touch_rows, limit)?;
916
917        let mut symbol_touch_stmt = conn
918            .prepare(
919                "SELECT json FROM git_symbol_touches
920                 WHERE file_path = ?1
921                 ORDER BY touched_at DESC, id
922                 LIMIT ?2",
923            )
924            .map_err(storage_err)?;
925        let symbol_touch_rows = symbol_touch_stmt
926            .query_map(params![&normalized_path, query_limit], |row| {
927                row.get::<_, String>(0)
928            })
929            .map_err(storage_err)?;
930        let (symbol_touches, symbol_touches_truncated) =
931            collect_limited_json(symbol_touch_rows, limit)?;
932
933        let mut cochange_stmt = conn
934            .prepare(
935                "SELECT json FROM git_cochange_edges
936                 WHERE path = ?1
937                 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
938                 LIMIT ?2",
939            )
940            .map_err(storage_err)?;
941        let cochange_rows = cochange_stmt
942            .query_map(params![&normalized_path, query_limit], |row| {
943                row.get::<_, String>(0)
944            })
945            .map_err(storage_err)?;
946        let (cochange_neighbors, cochange_truncated) = collect_limited_json(cochange_rows, limit)?;
947
948        let mut reviewer_stmt = conn
949            .prepare(
950                "SELECT e.json FROM git_review_events e
951                 WHERE e.path = ?1
952                    OR (
953                      e.path IS NULL
954                      AND e.commit_id IN (
955                        SELECT t.commit_id FROM git_file_touches t
956                        WHERE t.path = ?1 OR t.previous_path = ?1
957                      )
958                    )
959                 ORDER BY e.observed_at DESC, e.id
960                 LIMIT ?2",
961            )
962            .map_err(storage_err)?;
963        let reviewer_rows = reviewer_stmt
964            .query_map(params![&normalized_path, query_limit], |row| {
965                row.get::<_, String>(0)
966            })
967            .map_err(storage_err)?;
968        let (reviewer_evidence, reviewers_truncated) = collect_limited_json(reviewer_rows, limit)?;
969
970        let truncated = commits_truncated
971            || file_touches_truncated
972            || symbol_touches_truncated
973            || cochange_truncated
974            || reviewers_truncated;
975        let mut uncertainty = Vec::new();
976        if recent_commits.is_empty()
977            && file_touches.is_empty()
978            && symbol_touches.is_empty()
979            && cochange_neighbors.is_empty()
980            && reviewer_evidence.is_empty()
981        {
982            uncertainty.push("no persisted history evidence is available for this path".into());
983        } else {
984            if symbol_touches.is_empty() {
985                uncertainty.push("no symbol-level history is stored for this path".into());
986            }
987            if reviewer_evidence.is_empty() {
988                uncertainty.push("no reviewer or owner evidence is stored for this path".into());
989            }
990        }
991        if truncated {
992            uncertainty.push(format!(
993                "history results are truncated to {limit} records per category"
994            ));
995        }
996
997        Ok(HistorySummary {
998            path: path.to_path_buf(),
999            recent_commits,
1000            file_touches,
1001            symbol_touches,
1002            cochange_neighbors,
1003            reviewer_evidence,
1004            truncated,
1005            uncertainty,
1006        })
1007    }
1008
1009    fn provenance_for_path(&self, path: &Path, limit: usize) -> Result<FileProvenance> {
1010        let normalized_path = history_path(path)?;
1011        if limit == 0 {
1012            return Ok(FileProvenance {
1013                path: path.to_path_buf(),
1014                first_seen: None,
1015                last_touched: None,
1016                recent_touches: Vec::new(),
1017                confidence: Confidence::Low,
1018                truncated: false,
1019                uncertainty: vec!["provenance query limit is zero".into()],
1020            });
1021        }
1022
1023        let conn = self
1024            .connection
1025            .lock()
1026            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1027        let query_limit = history_query_limit(limit);
1028        let aliases = "
1029            WITH RECURSIVE aliases(path) AS (
1030              SELECT ?1
1031              UNION
1032              SELECT t.previous_path
1033              FROM git_file_touches t JOIN aliases a ON t.path = a.path
1034              WHERE t.previous_path IS NOT NULL
1035              UNION
1036              SELECT t.path
1037              FROM git_file_touches t JOIN aliases a ON t.previous_path = a.path
1038            )";
1039        let recent_sql = format!(
1040            "{aliases}
1041             SELECT DISTINCT t.json, c.json
1042             FROM git_file_touches t
1043             JOIN git_commits c ON c.id = t.commit_id
1044             WHERE t.path IN aliases OR t.previous_path IN aliases
1045             ORDER BY t.touched_at DESC, t.id
1046             LIMIT ?2"
1047        );
1048        let mut recent_stmt = conn.prepare(&recent_sql).map_err(storage_err)?;
1049        let rows = recent_stmt
1050            .query_map(params![&normalized_path, query_limit], |row| {
1051                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1052            })
1053            .map_err(storage_err)?;
1054        let mut recent_touches = collect_provenance_rows(rows, file_provenance_touch)?;
1055        let truncated = recent_touches.len() > limit;
1056        recent_touches.truncate(limit);
1057
1058        let first_sql = format!(
1059            "{aliases}
1060             SELECT DISTINCT t.json, c.json
1061             FROM git_file_touches t
1062             JOIN git_commits c ON c.id = t.commit_id
1063             WHERE t.path IN aliases OR t.previous_path IN aliases
1064             ORDER BY t.touched_at ASC, t.id
1065             LIMIT 1"
1066        );
1067        let first_seen = conn
1068            .query_row(&first_sql, params![&normalized_path], |row| {
1069                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1070            })
1071            .optional()
1072            .map_err(storage_err)?
1073            .map(|(touch, commit)| file_provenance_touch(&touch, &commit))
1074            .transpose()?;
1075        let last_touched = recent_touches.first().cloned();
1076        let mut uncertainty = Vec::new();
1077        if first_seen.is_none() {
1078            uncertainty.push("no persisted commit provenance is available for this path".into());
1079        } else if first_seen
1080            .as_ref()
1081            .is_some_and(|touch| touch.change_kind != open_kioku_core::GitChangeKind::Added)
1082        {
1083            uncertainty.push(
1084                "first_seen is the earliest persisted touch in the configured local history window, not a proven file-creation commit"
1085                    .into(),
1086            );
1087        }
1088        if truncated {
1089            uncertainty.push(format!(
1090                "recent provenance is truncated to {limit} touch records"
1091            ));
1092        }
1093
1094        let confidence = if uncertainty.is_empty() {
1095            Confidence::Exact
1096        } else if last_touched.is_some() {
1097            Confidence::High
1098        } else {
1099            Confidence::Low
1100        };
1101        Ok(FileProvenance {
1102            path: path.to_path_buf(),
1103            first_seen,
1104            last_touched,
1105            recent_touches,
1106            confidence,
1107            truncated,
1108            uncertainty,
1109        })
1110    }
1111
1112    fn provenance_for_symbol(
1113        &self,
1114        symbol_id: &SymbolId,
1115        limit: usize,
1116    ) -> Result<SymbolProvenance> {
1117        let conn = self
1118            .connection
1119            .lock()
1120            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1121        let symbol_json: Option<String> = conn
1122            .query_row(
1123                "SELECT json FROM symbols WHERE id = ?1",
1124                params![&symbol_id.0],
1125                |row| row.get(0),
1126            )
1127            .optional()
1128            .map_err(storage_err)?;
1129        let Some(symbol_json) = symbol_json else {
1130            return Err(OkError::SymbolNotFound(symbol_id.0.clone()));
1131        };
1132        let symbol: Symbol = serde_json::from_str(&symbol_json)?;
1133        let file_path: String = conn
1134            .query_row(
1135                "SELECT path FROM files WHERE id = ?1",
1136                params![&symbol.file_id.0],
1137                |row| row.get(0),
1138            )
1139            .map_err(storage_err)?;
1140        if limit == 0 {
1141            return Ok(SymbolProvenance {
1142                symbol_id: symbol.id,
1143                qualified_name: symbol.qualified_name,
1144                file_path: PathBuf::from(file_path),
1145                range: symbol.range,
1146                first_seen: None,
1147                last_touched: None,
1148                recent_touches: Vec::new(),
1149                confidence: Confidence::Low,
1150                truncated: false,
1151                uncertainty: vec!["provenance query limit is zero".into()],
1152            });
1153        }
1154
1155        let query_limit = history_query_limit(limit);
1156        let mut recent_stmt = conn
1157            .prepare(
1158                "SELECT t.json, c.json
1159                 FROM git_symbol_touches t
1160                 JOIN git_commits c ON c.id = t.commit_id
1161                 WHERE t.symbol_id = ?1
1162                 ORDER BY t.touched_at DESC, t.id
1163                 LIMIT ?2",
1164            )
1165            .map_err(storage_err)?;
1166        let rows = recent_stmt
1167            .query_map(params![&symbol_id.0, query_limit], |row| {
1168                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1169            })
1170            .map_err(storage_err)?;
1171        let mut recent_touches = collect_provenance_rows(rows, symbol_provenance_touch)?;
1172        let truncated = recent_touches.len() > limit;
1173        recent_touches.truncate(limit);
1174        let first_seen = conn
1175            .query_row(
1176                "SELECT t.json, c.json
1177                 FROM git_symbol_touches t
1178                 JOIN git_commits c ON c.id = t.commit_id
1179                 WHERE t.symbol_id = ?1
1180                 ORDER BY t.touched_at ASC, t.id
1181                 LIMIT 1",
1182                params![&symbol_id.0],
1183                |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)),
1184            )
1185            .optional()
1186            .map_err(storage_err)?
1187            .map(|(touch, commit)| symbol_provenance_touch(&touch, &commit))
1188            .transpose()?;
1189        let last_touched = recent_touches.first().cloned();
1190        let mut uncertainty = recent_touches
1191            .iter()
1192            .flat_map(|touch| touch.uncertainty.clone())
1193            .collect::<Vec<_>>();
1194        if let Some(first_seen) = &first_seen {
1195            uncertainty.extend(first_seen.uncertainty.clone());
1196            uncertainty.push(
1197                "first_seen is the earliest line-mapped touch in the configured local history window; it may not be the symbol-introduction commit"
1198                    .into(),
1199            );
1200        } else {
1201            uncertainty
1202                .push("no persisted line-level commit mapping is available for this symbol".into());
1203        }
1204        if symbol.range.is_none() {
1205            uncertainty.push(
1206                "the indexed symbol has no line range, so commit hunks cannot be mapped".into(),
1207            );
1208        }
1209        if truncated {
1210            uncertainty.push(format!(
1211                "recent provenance is truncated to {limit} touch records"
1212            ));
1213        }
1214        uncertainty.sort();
1215        uncertainty.dedup();
1216        let confidence = recent_touches
1217            .iter()
1218            .map(|touch| touch.confidence)
1219            .chain(first_seen.iter().map(|touch| touch.confidence))
1220            .reduce(lower_history_confidence)
1221            .unwrap_or(Confidence::Low);
1222
1223        Ok(SymbolProvenance {
1224            symbol_id: symbol.id,
1225            qualified_name: symbol.qualified_name,
1226            file_path: PathBuf::from(file_path),
1227            range: symbol.range,
1228            first_seen,
1229            last_touched,
1230            recent_touches,
1231            confidence,
1232            truncated,
1233            uncertainty,
1234        })
1235    }
1236
1237    fn cochange_neighbors(&self, path: &Path, limit: usize) -> Result<Vec<GitCochangeEdge>> {
1238        if limit == 0 {
1239            return Ok(Vec::new());
1240        }
1241        let normalized_path = history_path(path)?;
1242        let conn = self
1243            .connection
1244            .lock()
1245            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1246        let mut stmt = conn
1247            .prepare(
1248                "SELECT json FROM git_cochange_edges
1249                 WHERE path = ?1
1250                 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
1251                 LIMIT ?2",
1252            )
1253            .map_err(storage_err)?;
1254        let rows = stmt
1255            .query_map(
1256                params![normalized_path, limit.min(i64::MAX as usize) as i64],
1257                |row| row.get::<_, String>(0),
1258            )
1259            .map_err(storage_err)?;
1260        collect_json(rows)
1261    }
1262
1263    fn recent_commits(&self, limit: usize) -> Result<Vec<GitCommitRecord>> {
1264        if limit == 0 {
1265            return Ok(Vec::new());
1266        }
1267        let conn = self
1268            .connection
1269            .lock()
1270            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1271        let mut stmt = conn
1272            .prepare("SELECT json FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1")
1273            .map_err(storage_err)?;
1274        let rows = stmt
1275            .query_map(params![limit.min(i64::MAX as usize) as i64], |row| {
1276                row.get::<_, String>(0)
1277            })
1278            .map_err(storage_err)?;
1279        collect_json(rows)
1280    }
1281}
1282
1283fn collect_provenance_rows<F>(
1284    rows: rusqlite::MappedRows<'_, F>,
1285    decode: fn(&str, &str) -> Result<ProvenanceTouch>,
1286) -> Result<Vec<ProvenanceTouch>>
1287where
1288    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<(String, String)>,
1289{
1290    let mut touches = Vec::new();
1291    for row in rows {
1292        let (touch, commit) = row.map_err(storage_err)?;
1293        touches.push(decode(&touch, &commit)?);
1294    }
1295    Ok(touches)
1296}
1297
1298fn file_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1299    let touch: GitFileTouch = serde_json::from_str(touch)?;
1300    let commit: GitCommitRecord = serde_json::from_str(commit)?;
1301    Ok(ProvenanceTouch {
1302        commit,
1303        path: touch.path,
1304        previous_path: touch.previous_path,
1305        symbol_id: None,
1306        qualified_name: None,
1307        change_kind: touch.change_kind,
1308        line_ranges: Vec::new(),
1309        confidence: Confidence::Exact,
1310        uncertainty: Vec::new(),
1311    })
1312}
1313
1314fn symbol_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1315    let touch: GitSymbolTouch = serde_json::from_str(touch)?;
1316    let commit: GitCommitRecord = serde_json::from_str(commit)?;
1317    Ok(ProvenanceTouch {
1318        commit,
1319        path: touch.file_path,
1320        previous_path: None,
1321        symbol_id: touch.symbol_id,
1322        qualified_name: Some(touch.qualified_name),
1323        change_kind: touch.change_kind,
1324        line_ranges: touch.line_ranges,
1325        confidence: touch.confidence,
1326        uncertainty: touch.uncertainty,
1327    })
1328}
1329
1330fn lower_history_confidence(left: Confidence, right: Confidence) -> Confidence {
1331    if history_confidence_rank(left) <= history_confidence_rank(right) {
1332        left
1333    } else {
1334        right
1335    }
1336}
1337
1338fn history_confidence_rank(confidence: Confidence) -> u8 {
1339    match confidence {
1340        Confidence::Low => 0,
1341        Confidence::Medium => 1,
1342        Confidence::High => 2,
1343        Confidence::Exact => 3,
1344    }
1345}
1346const DEFAULT_GRAPH_QUERY_LIMIT: usize = 100;
1347const MAX_GRAPH_QUERY_LIMIT: usize = 1_000;
1348
1349struct IndexRows<'a> {
1350    files: &'a [File],
1351    symbols: &'a [Symbol],
1352    chunks: &'a [CodeChunk],
1353    tests: &'a [TestTarget],
1354    imports: &'a [Import],
1355    occurrences: &'a [SymbolOccurrence],
1356    analysis_facts: &'a [AnalysisFact],
1357}
1358
1359fn insert_index_rows(tx: &Transaction<'_>, rows: IndexRows<'_>) -> Result<()> {
1360    for file in rows.files {
1361        tx.execute(
1362            "INSERT INTO files(id, path, json) VALUES(?1, ?2, ?3)",
1363            params![
1364                &file.id.0,
1365                file.path.to_string_lossy().as_ref(),
1366                serde_json::to_string(file)?
1367            ],
1368        )
1369        .map_err(storage_err)?;
1370    }
1371    for symbol in rows.symbols {
1372        tx.execute(
1373            "INSERT INTO symbols(id, name, qualified_name, file_id, json) VALUES(?1, ?2, ?3, ?4, ?5)",
1374            params![
1375                &symbol.id.0,
1376                &symbol.name,
1377                &symbol.qualified_name,
1378                &symbol.file_id.0,
1379                serde_json::to_string(symbol)?
1380            ],
1381        )
1382        .map_err(storage_err)?;
1383    }
1384    for chunk in rows.chunks {
1385        tx.execute(
1386            "INSERT INTO chunks(id, file_id, start_line, end_line, text, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
1387            params![
1388                &chunk.id,
1389                &chunk.file_id.0,
1390                chunk.range.start,
1391                chunk.range.end,
1392                &chunk.text,
1393                serde_json::to_string(chunk)?
1394            ],
1395        )
1396        .map_err(storage_err)?;
1397    }
1398    for test in rows.tests {
1399        tx.execute(
1400            "INSERT INTO tests(id, file_id, json) VALUES(?1, ?2, ?3) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
1401            params![&test.id, &test.file_id.0, serde_json::to_string(test)?],
1402        )
1403        .map_err(storage_err)?;
1404    }
1405    for import in rows.imports {
1406        tx.execute(
1407            "INSERT INTO imports(id, file_id, imported, json) VALUES(?1, ?2, ?3, ?4)",
1408            params![
1409                occurrence_id(
1410                    &import.file_id.0,
1411                    &import.imported,
1412                    import.range.as_ref().map(|range| range.start),
1413                    true
1414                ),
1415                &import.file_id.0,
1416                &import.imported,
1417                serde_json::to_string(import)?
1418            ],
1419        )
1420        .map_err(storage_err)?;
1421    }
1422    for occurrence in rows.occurrences {
1423        tx.execute(
1424            "INSERT INTO occurrences(id, symbol_id, file_id, is_definition, json) VALUES(?1, ?2, ?3, ?4, ?5)",
1425            params![
1426                occurrence_id(
1427                    &occurrence.file_id.0,
1428                    &occurrence.symbol_id.0,
1429                    occurrence.range.as_ref().map(|range| range.start),
1430                    occurrence.is_definition,
1431                ),
1432                &occurrence.symbol_id.0,
1433                &occurrence.file_id.0,
1434                if occurrence.is_definition { 1 } else { 0 },
1435                serde_json::to_string(occurrence)?
1436            ],
1437        )
1438        .map_err(storage_err)?;
1439    }
1440    for fact in rows.analysis_facts {
1441        tx.execute(
1442            "INSERT INTO analysis_facts(id, file_id, source_type, target, json) VALUES(?1, ?2, ?3, ?4, ?5)",
1443            params![
1444                &fact.id,
1445                &fact.file_id.0,
1446                source_type_name(&fact.source_type),
1447                &fact.target,
1448                serde_json::to_string(fact)?
1449            ],
1450        )
1451        .map_err(storage_err)?;
1452    }
1453    Ok(())
1454}
1455
1456fn insert_graph_rows(tx: &Transaction<'_>, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
1457    for node in nodes {
1458        let evidence_available = node.file_id.is_some() || node.symbol_id.is_some();
1459        tx.execute(
1460            "INSERT INTO graph_nodes(id, label, node_type, file_id, symbol_id, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
1461            params![
1462                &node.id.0,
1463                &node.label,
1464                format!("{:?}", node.node_type),
1465                node.file_id.as_ref().map(|f| &f.0),
1466                node.symbol_id.as_ref().map(|s| &s.0),
1467                evidence_available,
1468                0_i64,
1469                serde_json::to_string(node)?
1470            ],
1471        )
1472        .map_err(storage_err)?;
1473    }
1474    for edge in edges {
1475        let freshness = edge.evidence.indexed_at.timestamp();
1476        tx.execute(
1477            "INSERT INTO graph_edges(id, from_id, to_id, edge_type, confidence, source_type, source_file, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
1478            params![
1479                &edge.id.0,
1480                &edge.from.0,
1481                &edge.to.0,
1482                format!("{:?}", edge.edge_type),
1483                format!("{:?}", edge.evidence.confidence),
1484                format!("{:?}", edge.evidence.source_type),
1485                &edge.evidence.source,
1486                true,
1487                freshness,
1488                serde_json::to_string(edge)?
1489            ],
1490        )
1491        .map_err(storage_err)?;
1492    }
1493    Ok(())
1494}
1495
1496fn clamp_limit(limit: usize) -> usize {
1497    if limit == 0 {
1498        DEFAULT_GRAPH_QUERY_LIMIT
1499    } else {
1500        limit.min(MAX_GRAPH_QUERY_LIMIT)
1501    }
1502}
1503
1504impl GraphStore for SqliteStore {
1505    fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
1506        let mut conn = self
1507            .connection
1508            .lock()
1509            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1510        let tx = conn.transaction().map_err(storage_err)?;
1511        tx.execute("DELETE FROM graph_edges", [])
1512            .map_err(storage_err)?;
1513        tx.execute("DELETE FROM graph_nodes", [])
1514            .map_err(storage_err)?;
1515        insert_graph_rows(&tx, nodes, edges)?;
1516        tx.commit().map_err(storage_err)?;
1517        Ok(())
1518    }
1519
1520    fn node_type_stats(
1521        &self,
1522    ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
1523        let conn = self
1524            .connection
1525            .lock()
1526            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1527        let mut stmt = conn
1528            .prepare("SELECT node_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_nodes GROUP BY node_type")
1529            .map_err(storage_err)?;
1530        let mut rows = stmt.query([]).map_err(storage_err)?;
1531        let mut map = std::collections::HashMap::new();
1532        while let Some(row) = rows.next().map_err(storage_err)? {
1533            let t: String = row.get(0).map_err(storage_err)?;
1534            let c: i64 = row.get(1).map_err(storage_err)?;
1535            let ev: bool = row.get(2).unwrap_or(false);
1536            let fr: Option<i64> = row.get(3).unwrap_or(None);
1537            map.insert(
1538                t,
1539                open_kioku_storage::TypeStats {
1540                    count: c as usize,
1541                    evidence_available: ev,
1542                    freshness: fr.map(|v| v as u64),
1543                },
1544            );
1545        }
1546        Ok(map)
1547    }
1548
1549    fn edge_type_stats(
1550        &self,
1551    ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
1552        let conn = self
1553            .connection
1554            .lock()
1555            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1556        let mut stmt = conn
1557            .prepare("SELECT edge_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_edges GROUP BY edge_type")
1558            .map_err(storage_err)?;
1559        let mut rows = stmt.query([]).map_err(storage_err)?;
1560        let mut map = std::collections::HashMap::new();
1561        while let Some(row) = rows.next().map_err(storage_err)? {
1562            let t: String = row.get(0).map_err(storage_err)?;
1563            let c: i64 = row.get(1).map_err(storage_err)?;
1564            let ev: bool = row.get(2).unwrap_or(false);
1565            let fr: Option<i64> = row.get(3).unwrap_or(None);
1566            map.insert(
1567                t,
1568                open_kioku_storage::TypeStats {
1569                    count: c as usize,
1570                    evidence_available: ev,
1571                    freshness: fr.map(|v| v as u64),
1572                },
1573            );
1574        }
1575        Ok(map)
1576    }
1577
1578    fn node_by_id(&self, id: &str) -> Result<Option<GraphNode>> {
1579        let conn = self
1580            .connection
1581            .lock()
1582            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1583        graph_node_by_id(&conn, id)
1584    }
1585
1586    fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)> {
1587        let conn = self
1588            .connection
1589            .lock()
1590            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1591        let mut stmt = conn
1592            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 OR to_id = ?1 LIMIT ?2")
1593            .map_err(storage_err)?;
1594        let rows = stmt
1595            .query_map(params![node, limit as i64], |row| row.get::<_, String>(0))
1596            .map_err(storage_err)?;
1597        let edges: Vec<GraphEdge> = collect_json(rows)?;
1598        let mut ids = edges
1599            .iter()
1600            .flat_map(|edge| [edge.from.0.clone(), edge.to.0.clone()])
1601            .collect::<Vec<_>>();
1602        ids.sort();
1603        ids.dedup();
1604        let mut nodes = Vec::new();
1605        for id in ids {
1606            if let Some(node) = graph_node_by_id(&conn, &id)? {
1607                nodes.push(node);
1608            }
1609        }
1610        Ok((nodes, edges))
1611    }
1612
1613    fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>> {
1614        use std::collections::{HashSet, VecDeque};
1615
1616        let conn = self
1617            .connection
1618            .lock()
1619            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1620
1621        // Prepare the statement once outside the BFS loop to avoid
1622        // O(N) statement recompilation on large graphs.
1623        let mut edge_stmt = conn
1624            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1")
1625            .map_err(storage_err)?;
1626
1627        let mut queue = VecDeque::from([(from.to_string(), Vec::<GraphEdge>::new())]);
1628        let mut seen = HashSet::new();
1629        while let Some((node, path)) = queue.pop_front() {
1630            if node == to {
1631                return Ok(path);
1632            }
1633            if path.len() >= max_depth || !seen.insert(node.clone()) {
1634                continue;
1635            }
1636            let rows = edge_stmt
1637                .query_map(params![&node], |row| row.get::<_, String>(0))
1638                .map_err(storage_err)?;
1639            let edges: Vec<GraphEdge> = collect_json(rows)?;
1640            for edge in edges {
1641                let mut next_path = path.clone();
1642                next_path.push(edge.clone());
1643                queue.push_back((edge.to.0.clone(), next_path));
1644            }
1645        }
1646        Ok(Vec::new())
1647    }
1648    fn nodes_by_type(
1649        &self,
1650        node_type: GraphNodeType,
1651        limit: usize,
1652        offset: usize,
1653    ) -> Result<Vec<GraphNode>> {
1654        let conn = self
1655            .connection
1656            .lock()
1657            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1658        let limit = clamp_limit(limit) as i64;
1659        let offset = offset as i64;
1660        let type_str = format!("{:?}", node_type);
1661        let mut stmt = conn
1662            .prepare(
1663                "SELECT json FROM graph_nodes WHERE node_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
1664            )
1665            .map_err(storage_err)?;
1666        let rows = stmt
1667            .query_map(params![type_str, limit, offset], |row| {
1668                row.get::<_, String>(0)
1669            })
1670            .map_err(storage_err)?;
1671        collect_json(rows)
1672    }
1673
1674    fn all_graph_nodes(&self) -> Result<Vec<GraphNode>> {
1675        let conn = self
1676            .connection
1677            .lock()
1678            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1679        let mut stmt = conn
1680            .prepare("SELECT json FROM graph_nodes ORDER BY id")
1681            .map_err(storage_err)?;
1682        let rows = stmt
1683            .query_map([], |row| row.get::<_, String>(0))
1684            .map_err(storage_err)?;
1685        collect_json(rows)
1686    }
1687
1688    fn edges_by_type(
1689        &self,
1690        edge_type: GraphEdgeType,
1691        limit: usize,
1692        offset: usize,
1693    ) -> Result<Vec<GraphEdge>> {
1694        let conn = self
1695            .connection
1696            .lock()
1697            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1698        let limit = clamp_limit(limit) as i64;
1699        let offset = offset as i64;
1700        let type_str = format!("{:?}", edge_type);
1701        let mut stmt = conn
1702            .prepare(
1703                "SELECT json FROM graph_edges WHERE edge_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
1704            )
1705            .map_err(storage_err)?;
1706        let rows = stmt
1707            .query_map(params![type_str, limit, offset], |row| {
1708                row.get::<_, String>(0)
1709            })
1710            .map_err(storage_err)?;
1711        collect_json(rows)
1712    }
1713
1714    fn graph_counts(&self) -> Result<GraphCounts> {
1715        let conn = self
1716            .connection
1717            .lock()
1718            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1719        let nodes: usize = conn
1720            .query_row("SELECT COUNT(*) FROM graph_nodes", [], |row| row.get(0))
1721            .map_err(storage_err)?;
1722        let edges: usize = conn
1723            .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
1724            .map_err(storage_err)?;
1725        Ok(GraphCounts { nodes, edges })
1726    }
1727
1728    fn graph_schema_counts(&self) -> Result<GraphSchemaCounts> {
1729        let conn = self
1730            .connection
1731            .lock()
1732            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1733
1734        let mut node_types = std::collections::BTreeMap::new();
1735        let mut stmt = conn
1736            .prepare("SELECT node_type, COUNT(*) FROM graph_nodes GROUP BY node_type")
1737            .map_err(storage_err)?;
1738        let mut rows = stmt.query([]).map_err(storage_err)?;
1739        while let Some(row) = rows.next().map_err(storage_err)? {
1740            let ntype: String = row.get(0).map_err(storage_err)?;
1741            let count: usize = row.get(1).map_err(storage_err)?;
1742            if !ntype.is_empty() {
1743                node_types.insert(ntype, count);
1744            }
1745        }
1746
1747        let mut edge_types = std::collections::BTreeMap::new();
1748        let mut stmt = conn
1749            .prepare("SELECT edge_type, COUNT(*) FROM graph_edges GROUP BY edge_type")
1750            .map_err(storage_err)?;
1751        let mut rows = stmt.query([]).map_err(storage_err)?;
1752        while let Some(row) = rows.next().map_err(storage_err)? {
1753            let etype: String = row.get(0).map_err(storage_err)?;
1754            let count: usize = row.get(1).map_err(storage_err)?;
1755            if !etype.is_empty() {
1756                edge_types.insert(etype, count);
1757            }
1758        }
1759
1760        Ok(GraphSchemaCounts {
1761            node_types,
1762            edge_types,
1763        })
1764    }
1765
1766    fn graph_edges_between(&self, from: &str, to: &str, limit: usize) -> Result<Vec<GraphEdge>> {
1767        let conn = self
1768            .connection
1769            .lock()
1770            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1771        let limit = clamp_limit(limit) as i64;
1772        let mut stmt = conn
1773            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 AND to_id = ?2 ORDER BY id LIMIT ?3")
1774            .map_err(storage_err)?;
1775        let rows = stmt
1776            .query_map(params![from, to, limit], |row| row.get::<_, String>(0))
1777            .map_err(storage_err)?;
1778        collect_json(rows)
1779    }
1780}
1781
1782fn is_duplicate_column(err: &rusqlite::Error) -> bool {
1783    if let rusqlite::Error::SqliteFailure(_, Some(msg)) = err {
1784        msg.contains("duplicate column name")
1785    } else {
1786        false
1787    }
1788}
1789
1790fn add_column_if_not_exists(conn: &mut Connection, stmt: &str) -> Result<()> {
1791    match conn.execute(stmt, []) {
1792        Ok(_) => Ok(()),
1793        Err(err) if is_duplicate_column(&err) => Ok(()),
1794        Err(err) => Err(storage_err(err)),
1795    }
1796}
1797
1798fn migrate_graph_schema(conn: &mut Connection) -> Result<()> {
1799    // Add columns to graph_nodes
1800    add_column_if_not_exists(
1801        conn,
1802        "ALTER TABLE graph_nodes ADD COLUMN node_type TEXT DEFAULT ''",
1803    )?;
1804    add_column_if_not_exists(
1805        conn,
1806        "ALTER TABLE graph_nodes ADD COLUMN file_id TEXT DEFAULT ''",
1807    )?;
1808    add_column_if_not_exists(
1809        conn,
1810        "ALTER TABLE graph_nodes ADD COLUMN symbol_id TEXT DEFAULT ''",
1811    )?;
1812    add_column_if_not_exists(
1813        conn,
1814        "ALTER TABLE graph_nodes ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
1815    )?;
1816    add_column_if_not_exists(
1817        conn,
1818        "ALTER TABLE graph_nodes ADD COLUMN freshness INTEGER DEFAULT 0",
1819    )?;
1820
1821    // Add columns to graph_edges
1822    add_column_if_not_exists(
1823        conn,
1824        "ALTER TABLE graph_edges ADD COLUMN confidence TEXT DEFAULT ''",
1825    )?;
1826    add_column_if_not_exists(
1827        conn,
1828        "ALTER TABLE graph_edges ADD COLUMN source_type TEXT DEFAULT ''",
1829    )?;
1830    add_column_if_not_exists(
1831        conn,
1832        "ALTER TABLE graph_edges ADD COLUMN source_file TEXT DEFAULT ''",
1833    )?;
1834    add_column_if_not_exists(
1835        conn,
1836        "ALTER TABLE graph_edges ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
1837    )?;
1838    add_column_if_not_exists(
1839        conn,
1840        "ALTER TABLE graph_edges ADD COLUMN freshness INTEGER DEFAULT 0",
1841    )?;
1842
1843    backfill_graph_query_columns(conn)?;
1844
1845    // Add indexes (these are idempotent via IF NOT EXISTS)
1846    conn.execute(
1847        "CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(node_type)",
1848        [],
1849    )
1850    .map_err(storage_err)?;
1851    conn.execute(
1852        "CREATE INDEX IF NOT EXISTS idx_graph_nodes_file ON graph_nodes(file_id)",
1853        [],
1854    )
1855    .map_err(storage_err)?;
1856    conn.execute(
1857        "CREATE INDEX IF NOT EXISTS idx_graph_nodes_symbol ON graph_nodes(symbol_id)",
1858        [],
1859    )
1860    .map_err(storage_err)?;
1861    conn.execute(
1862        "CREATE INDEX IF NOT EXISTS idx_graph_edges_type ON graph_edges(edge_type)",
1863        [],
1864    )
1865    .map_err(storage_err)?;
1866    conn.execute(
1867        "CREATE INDEX IF NOT EXISTS idx_graph_edges_from_type ON graph_edges(from_id, edge_type)",
1868        [],
1869    )
1870    .map_err(storage_err)?;
1871    conn.execute(
1872        "CREATE INDEX IF NOT EXISTS idx_graph_edges_to_type ON graph_edges(to_id, edge_type)",
1873        [],
1874    )
1875    .map_err(storage_err)?;
1876    conn.execute(
1877        "CREATE INDEX IF NOT EXISTS idx_graph_edges_source_type ON graph_edges(source_type)",
1878        [],
1879    )
1880    .map_err(storage_err)?;
1881
1882    let version: i64 = conn
1883        .pragma_query_value(None, "user_version", |row| row.get(0))
1884        .map_err(storage_err)?;
1885    if version < SQLITE_GRAPH_SCHEMA_VERSION {
1886        conn.pragma_update(None, "user_version", SQLITE_GRAPH_SCHEMA_VERSION)
1887            .map_err(storage_err)?;
1888    }
1889
1890    Ok(())
1891}
1892
1893fn backfill_graph_query_columns(conn: &mut Connection) -> Result<()> {
1894    let node_rows = {
1895        let mut stmt = conn
1896            .prepare(
1897                "SELECT id, json FROM graph_nodes
1898                 WHERE COALESCE(node_type, '') = ''
1899                    OR COALESCE(file_id, '') = ''
1900                    OR COALESCE(symbol_id, '') = ''",
1901            )
1902            .map_err(storage_err)?;
1903        let rows = stmt
1904            .query_map([], |row| {
1905                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1906            })
1907            .map_err(storage_err)?;
1908        let mut rows_out = Vec::new();
1909        for row in rows {
1910            rows_out.push(row.map_err(storage_err)?);
1911        }
1912        rows_out
1913    };
1914    for (id, json) in node_rows {
1915        let Ok(node) = serde_json::from_str::<GraphNode>(&json) else {
1916            continue;
1917        };
1918        conn.execute(
1919            "UPDATE graph_nodes
1920             SET node_type = ?1,
1921                 file_id = ?2,
1922                 symbol_id = ?3,
1923                 evidence_available = ?4
1924             WHERE id = ?5",
1925            params![
1926                format!("{:?}", node.node_type),
1927                node.file_id.as_ref().map(|file_id| file_id.0.as_str()),
1928                node.symbol_id
1929                    .as_ref()
1930                    .map(|symbol_id| symbol_id.0.as_str()),
1931                node.file_id.is_some() || node.symbol_id.is_some(),
1932                id,
1933            ],
1934        )
1935        .map_err(storage_err)?;
1936    }
1937
1938    let edge_rows = {
1939        let mut stmt = conn
1940            .prepare(
1941                "SELECT id, json FROM graph_edges
1942                 WHERE COALESCE(edge_type, '') = ''
1943                    OR COALESCE(confidence, '') = ''
1944                    OR COALESCE(source_type, '') = ''
1945                    OR COALESCE(source_file, '') = ''",
1946            )
1947            .map_err(storage_err)?;
1948        let rows = stmt
1949            .query_map([], |row| {
1950                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1951            })
1952            .map_err(storage_err)?;
1953        let mut rows_out = Vec::new();
1954        for row in rows {
1955            rows_out.push(row.map_err(storage_err)?);
1956        }
1957        rows_out
1958    };
1959    for (id, json) in edge_rows {
1960        let Ok(edge) = serde_json::from_str::<GraphEdge>(&json) else {
1961            continue;
1962        };
1963        conn.execute(
1964            "UPDATE graph_edges
1965             SET from_id = ?1,
1966                 to_id = ?2,
1967                 edge_type = ?3,
1968                 confidence = ?4,
1969                 source_type = ?5,
1970                 source_file = ?6,
1971                 evidence_available = ?7,
1972                 freshness = ?8
1973             WHERE id = ?9",
1974            params![
1975                edge.from.0.as_str(),
1976                edge.to.0.as_str(),
1977                format!("{:?}", edge.edge_type),
1978                format!("{:?}", edge.evidence.confidence),
1979                format!("{:?}", edge.evidence.source_type),
1980                edge.evidence.source.as_str(),
1981                true,
1982                edge.evidence.indexed_at.timestamp(),
1983                id,
1984            ],
1985        )
1986        .map_err(storage_err)?;
1987    }
1988
1989    Ok(())
1990}
1991
1992fn migrate_history_schema(conn: &mut Connection) -> Result<()> {
1993    ensure_supported_sqlite_schema(conn)?;
1994    let version: i64 = conn
1995        .pragma_query_value(None, "user_version", |row| row.get(0))
1996        .map_err(storage_err)?;
1997    let tx = conn.transaction().map_err(storage_err)?;
1998    tx.execute_batch(HISTORY_SCHEMA_V1).map_err(storage_err)?;
1999    if version < SQLITE_HISTORY_SCHEMA_VERSION {
2000        tx.pragma_update(None, "user_version", SQLITE_HISTORY_SCHEMA_VERSION)
2001            .map_err(storage_err)?;
2002    }
2003    tx.commit().map_err(storage_err)?;
2004    Ok(())
2005}
2006
2007fn ensure_supported_sqlite_schema(conn: &Connection) -> Result<()> {
2008    let version: i64 = conn
2009        .pragma_query_value(None, "user_version", |row| row.get(0))
2010        .map_err(storage_err)?;
2011    if version > SQLITE_SUPPORTED_SCHEMA_VERSION {
2012        return Err(OkError::Storage(format!(
2013            "sqlite schema version {version} is newer than supported version {SQLITE_SUPPORTED_SCHEMA_VERSION}"
2014        )));
2015    }
2016    Ok(())
2017}
2018
2019fn validate_history_snapshot(snapshot: &HistorySnapshot) -> Result<()> {
2020    if snapshot.schema_version != HISTORY_SCHEMA_VERSION {
2021        return Err(OkError::Storage(format!(
2022            "unsupported history snapshot schema version {}; expected {}",
2023            snapshot.schema_version, HISTORY_SCHEMA_VERSION
2024        )));
2025    }
2026
2027    let mut commit_ids = BTreeSet::new();
2028    for commit in &snapshot.commits {
2029        validate_text("commit id", &commit.id.0)?;
2030        if !commit_ids.insert(commit.id.0.clone()) {
2031            return Err(OkError::Storage(format!(
2032                "duplicate history commit id `{}`",
2033                commit.id
2034            )));
2035        }
2036        validate_text("commit author name", &commit.author.name)?;
2037        if let Some(committer) = &commit.committer {
2038            validate_text("commit committer name", &committer.name)?;
2039        }
2040        let mut parent_ids = BTreeSet::new();
2041        for parent_id in &commit.parent_ids {
2042            validate_text("parent commit id", &parent_id.0)?;
2043            if !parent_ids.insert(parent_id.0.as_str()) {
2044                return Err(OkError::Storage(format!(
2045                    "commit `{}` contains duplicate parent `{parent_id}`",
2046                    commit.id
2047                )));
2048            }
2049        }
2050    }
2051
2052    let mut file_touch_ids = BTreeSet::new();
2053    for touch in &snapshot.file_touches {
2054        validate_history_record_id(&touch.id, "file touch", &mut file_touch_ids)?;
2055        validate_commit_reference(&touch.commit_id, &commit_ids, "file touch")?;
2056        history_path(&touch.path)?;
2057        if let Some(previous_path) = &touch.previous_path {
2058            history_path(previous_path)?;
2059        }
2060    }
2061
2062    let mut symbol_touch_ids = BTreeSet::new();
2063    for touch in &snapshot.symbol_touches {
2064        validate_history_record_id(&touch.id, "symbol touch", &mut symbol_touch_ids)?;
2065        validate_commit_reference(&touch.commit_id, &commit_ids, "symbol touch")?;
2066        validate_text("symbol qualified name", &touch.qualified_name)?;
2067        history_path(&touch.file_path)?;
2068    }
2069
2070    let mut cochange_ids = BTreeSet::new();
2071    let mut cochange_pairs = BTreeSet::new();
2072    for edge in &snapshot.cochange_edges {
2073        validate_history_record_id(&edge.id, "co-change edge", &mut cochange_ids)?;
2074        let path = history_path(&edge.path)?;
2075        let cochanged_path = history_path(&edge.cochanged_path)?;
2076        if path == cochanged_path {
2077            return Err(OkError::Storage(format!(
2078                "co-change edge `{}` must connect two different paths",
2079                edge.id
2080            )));
2081        }
2082        if !cochange_pairs.insert((path.clone(), cochanged_path.clone())) {
2083            return Err(OkError::Storage(format!(
2084                "duplicate co-change edge `{path}` -> `{cochanged_path}`"
2085            )));
2086        }
2087        if edge.commit_count == 0 {
2088            return Err(OkError::Storage(format!(
2089                "co-change edge `{}` must have a positive commit count",
2090                edge.id
2091            )));
2092        }
2093        if !edge.recency_weight.is_finite() || edge.recency_weight < 0.0 {
2094            return Err(OkError::Storage(format!(
2095                "co-change edge `{}` has invalid recency weight {}",
2096                edge.id, edge.recency_weight
2097            )));
2098        }
2099        let mut sample_commits = BTreeSet::new();
2100        for commit_id in &edge.sample_commits {
2101            validate_text("sample commit id", &commit_id.0)?;
2102            if !sample_commits.insert(commit_id.0.as_str()) {
2103                return Err(OkError::Storage(format!(
2104                    "co-change edge `{}` contains duplicate sample commit `{commit_id}`",
2105                    edge.id
2106                )));
2107            }
2108        }
2109    }
2110
2111    let mut reviewer_ids = BTreeSet::new();
2112    for evidence in &snapshot.reviewer_evidence {
2113        validate_history_record_id(&evidence.id, "review event", &mut reviewer_ids)?;
2114        validate_text("reviewer name", &evidence.reviewer.name)?;
2115        validate_text("review evidence source", &evidence.source)?;
2116        if let Some(commit_id) = &evidence.commit_id {
2117            validate_text("review commit id", &commit_id.0)?;
2118        }
2119        if let Some(path) = &evidence.path {
2120            history_path(path)?;
2121        }
2122    }
2123
2124    Ok(())
2125}
2126
2127fn validate_history_record_id(
2128    id: &HistoryRecordId,
2129    kind: &str,
2130    ids: &mut BTreeSet<String>,
2131) -> Result<()> {
2132    validate_text(&format!("{kind} id"), &id.0)?;
2133    if !ids.insert(id.0.clone()) {
2134        return Err(OkError::Storage(format!("duplicate {kind} id `{id}`")));
2135    }
2136    Ok(())
2137}
2138
2139fn validate_commit_reference(
2140    commit_id: &GitCommitId,
2141    commit_ids: &BTreeSet<String>,
2142    kind: &str,
2143) -> Result<()> {
2144    validate_text("commit id", &commit_id.0)?;
2145    if !commit_ids.contains(&commit_id.0) {
2146        return Err(OkError::Storage(format!(
2147            "{kind} references missing commit `{commit_id}`"
2148        )));
2149    }
2150    Ok(())
2151}
2152
2153fn validate_text(field: &str, value: &str) -> Result<()> {
2154    if value.trim().is_empty() {
2155        return Err(OkError::Storage(format!("{field} must not be empty")));
2156    }
2157    Ok(())
2158}
2159
2160fn history_path(path: &Path) -> Result<String> {
2161    if path.as_os_str().is_empty()
2162        || path.is_absolute()
2163        || path
2164            .components()
2165            .any(|component| !matches!(component, std::path::Component::Normal(_)))
2166    {
2167        return Err(OkError::Storage(format!(
2168            "history path must be a normalized repository-relative path: {}",
2169            path.display()
2170        )));
2171    }
2172    let value = path.to_str().ok_or_else(|| {
2173        OkError::Storage(format!(
2174            "history path must be valid UTF-8: {}",
2175            path.display()
2176        ))
2177    })?;
2178    if value.contains('\\') {
2179        return Err(OkError::Storage(format!(
2180            "history path must use `/` separators: {}",
2181            path.display()
2182        )));
2183    }
2184    Ok(value.to_string())
2185}
2186
2187fn usize_to_i64(value: usize, field: &str) -> Result<i64> {
2188    i64::try_from(value)
2189        .map_err(|_| OkError::Storage(format!("{field} exceeds SQLite integer range")))
2190}
2191
2192fn history_query_limit(limit: usize) -> i64 {
2193    limit.saturating_add(1).min(i64::MAX as usize) as i64
2194}
2195
2196fn collect_limited_json<T, F>(
2197    rows: rusqlite::MappedRows<'_, F>,
2198    limit: usize,
2199) -> Result<(Vec<T>, bool)>
2200where
2201    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
2202    T: serde::de::DeserializeOwned,
2203{
2204    let mut values = collect_json(rows)?;
2205    let truncated = values.len() > limit;
2206    values.truncate(limit);
2207    Ok((values, truncated))
2208}
2209
2210fn collect_json<T, F>(rows: rusqlite::MappedRows<'_, F>) -> Result<Vec<T>>
2211where
2212    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
2213    T: serde::de::DeserializeOwned,
2214{
2215    let mut out = Vec::new();
2216    for row in rows {
2217        let raw = row.map_err(storage_err)?;
2218        out.push(serde_json::from_str(&raw)?);
2219    }
2220    Ok(out)
2221}
2222
2223fn graph_node_by_id(conn: &Connection, id: &str) -> Result<Option<GraphNode>> {
2224    let raw: Option<String> = conn
2225        .query_row(
2226            "SELECT json FROM graph_nodes WHERE id = ?1",
2227            params![id],
2228            |row| row.get(0),
2229        )
2230        .optional()
2231        .map_err(storage_err)?;
2232    raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
2233        .transpose()
2234}
2235
2236fn storage_err(err: rusqlite::Error) -> OkError {
2237    OkError::Storage(err.to_string())
2238}
2239
2240fn occurrence_id(file_id: &str, value: &str, line: Option<u32>, flag: bool) -> String {
2241    use sha2::{Digest, Sha256};
2242    let mut hasher = Sha256::new();
2243    hasher.update(file_id.as_bytes());
2244    hasher.update(b":");
2245    hasher.update(value.as_bytes());
2246    hasher.update(b":");
2247    hasher.update(line.unwrap_or_default().to_string().as_bytes());
2248    hasher.update(b":");
2249    hasher.update(if flag { b"1" } else { b"0" });
2250    format!("{:x}", hasher.finalize())
2251}
2252
2253fn source_type_name(source_type: &EvidenceSourceType) -> &'static str {
2254    match source_type {
2255        EvidenceSourceType::TreeSitter => "tree_sitter",
2256        EvidenceSourceType::Scip => "scip",
2257        EvidenceSourceType::Lsp => "lsp",
2258        EvidenceSourceType::Regex => "regex",
2259        EvidenceSourceType::Lexical => "lexical",
2260        EvidenceSourceType::Semantic => "semantic",
2261        EvidenceSourceType::Runtime => "runtime",
2262        EvidenceSourceType::GitHistory => "git_history",
2263        EvidenceSourceType::StaticAnalysis => "static_analysis",
2264        EvidenceSourceType::ExternalIntegration => "external_integration",
2265        EvidenceSourceType::Heuristic => "heuristic",
2266    }
2267}
2268
2269#[cfg(test)]
2270mod tests {
2271    use super::{SqliteStore, SQLITE_GRAPH_SCHEMA_VERSION};
2272    use chrono::{TimeZone, Utc};
2273    use open_kioku_core::{
2274        AnalysisFact, CodeChunk, Confidence, EdgeId, Evidence, EvidenceId, EvidenceSourceType,
2275        File, FileId, GitChangeKind, GitCochangeEdge, GitCommitId, GitCommitRecord, GitFileTouch,
2276        GitSymbolTouch, GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, HistoryRecordId,
2277        HistorySnapshot, IndexManifest, IndexQuality, Language, LineRange, NodeId, Owner,
2278        Repository, RepositoryId, ReviewerEvidence, ReviewerRole, Symbol, SymbolId, SymbolKind,
2279        SymbolOccurrence, HISTORY_SCHEMA_VERSION,
2280    };
2281    use open_kioku_storage::{
2282        GraphStore, HistoryStore, IndexData, MetadataStore, PartialIndexUpdate,
2283    };
2284    use rusqlite::{params, Connection};
2285    use std::collections::BTreeMap;
2286
2287    fn make_store() -> SqliteStore {
2288        SqliteStore::open(":memory:").expect("in-memory store")
2289    }
2290
2291    fn make_file(id: &str, path: &str) -> File {
2292        File {
2293            id: FileId::new(id),
2294            repository_id: RepositoryId::new("repo"),
2295            path: path.into(),
2296            language: Language::Rust,
2297            size_bytes: 100,
2298            content_hash: format!("hash-{id}"),
2299            is_generated: false,
2300            is_vendor: false,
2301        }
2302    }
2303
2304    fn make_symbol(id: &str, name: &str, file_id: &str) -> Symbol {
2305        Symbol {
2306            id: SymbolId::new(id),
2307            name: name.into(),
2308            qualified_name: format!("module::{name}"),
2309            kind: SymbolKind::Function,
2310            file_id: FileId::new(file_id),
2311            range: Some(LineRange::single(1)),
2312            language: Language::Rust,
2313            confidence: Confidence::High,
2314            provenance: EvidenceSourceType::TreeSitter,
2315        }
2316    }
2317
2318    fn evidence() -> Evidence {
2319        Evidence {
2320            id: EvidenceId::new("ev-1"),
2321            source: "test".into(),
2322            source_type: EvidenceSourceType::Lexical,
2323            file_range: None,
2324            symbol_id: None,
2325            confidence: Confidence::Medium,
2326            message: "test evidence".into(),
2327            indexed_at: Utc::now(),
2328            ..Default::default()
2329        }
2330    }
2331
2332    fn make_manifest() -> IndexManifest {
2333        IndexManifest {
2334            repository: Repository {
2335                id: RepositoryId::new("repo"),
2336                name: "repo".into(),
2337                root: std::path::PathBuf::from("."),
2338                branch: None,
2339                commit: None,
2340                indexed_at: None,
2341            },
2342            file_count: 2,
2343            symbol_count: 2,
2344            chunk_count: 0,
2345            indexed_at: Utc::now(),
2346            schema_version: 1,
2347            index_mode: Default::default(),
2348            phase_reports: Vec::new(),
2349            quality: IndexQuality::default(),
2350        }
2351    }
2352
2353    fn history_snapshot() -> HistorySnapshot {
2354        let older_at = Utc.with_ymd_and_hms(2026, 5, 1, 12, 0, 0).unwrap();
2355        let newer_at = Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap();
2356        let older_id = GitCommitId::new("older");
2357        let newer_id = GitCommitId::new("newer");
2358        HistorySnapshot {
2359            schema_version: HISTORY_SCHEMA_VERSION,
2360            commits: vec![
2361                GitCommitRecord {
2362                    id: older_id.clone(),
2363                    parent_ids: Vec::new(),
2364                    author: Owner {
2365                        name: "Older Author".into(),
2366                        email: Some("older@example.com".into()),
2367                    },
2368                    committer: None,
2369                    authored_at: older_at,
2370                    committed_at: older_at,
2371                    summary: "Introduce library".into(),
2372                    message: "Introduce library".into(),
2373                    file_count: 2,
2374                },
2375                GitCommitRecord {
2376                    id: newer_id.clone(),
2377                    parent_ids: vec![older_id.clone()],
2378                    author: Owner {
2379                        name: "Newer Author".into(),
2380                        email: Some("newer@example.com".into()),
2381                    },
2382                    committer: None,
2383                    authored_at: newer_at,
2384                    committed_at: newer_at,
2385                    summary: "Refine library".into(),
2386                    message: "Refine library and tests".into(),
2387                    file_count: 3,
2388                },
2389            ],
2390            file_touches: vec![
2391                GitFileTouch {
2392                    id: HistoryRecordId::new("file-touch-older"),
2393                    commit_id: older_id.clone(),
2394                    path: "src/lib.rs".into(),
2395                    previous_path: None,
2396                    change_kind: GitChangeKind::Added,
2397                    additions: Some(20),
2398                    deletions: Some(0),
2399                    touched_at: older_at,
2400                },
2401                GitFileTouch {
2402                    id: HistoryRecordId::new("file-touch-newer"),
2403                    commit_id: newer_id.clone(),
2404                    path: "src/lib.rs".into(),
2405                    previous_path: None,
2406                    change_kind: GitChangeKind::Modified,
2407                    additions: Some(5),
2408                    deletions: Some(2),
2409                    touched_at: newer_at,
2410                },
2411            ],
2412            symbol_touches: vec![GitSymbolTouch {
2413                id: HistoryRecordId::new("symbol-touch-newer"),
2414                commit_id: newer_id.clone(),
2415                symbol_id: Some(SymbolId::new("symbol-1")),
2416                qualified_name: "crate::history_for_file".into(),
2417                file_path: "src/lib.rs".into(),
2418                change_kind: GitChangeKind::Modified,
2419                line_ranges: vec![LineRange { start: 4, end: 8 }],
2420                confidence: Confidence::Medium,
2421                uncertainty: vec!["historical coordinates may have shifted".into()],
2422                touched_at: newer_at,
2423            }],
2424            cochange_edges: vec![
2425                GitCochangeEdge {
2426                    id: HistoryRecordId::new("cochange-test"),
2427                    path: "src/lib.rs".into(),
2428                    cochanged_path: "tests/lib_test.rs".into(),
2429                    commit_count: 2,
2430                    recency_weight: 1.8,
2431                    last_changed_at: Some(newer_at),
2432                    sample_commits: vec![newer_id.clone(), older_id.clone()],
2433                    test_corun: true,
2434                },
2435                GitCochangeEdge {
2436                    id: HistoryRecordId::new("cochange-docs"),
2437                    path: "src/lib.rs".into(),
2438                    cochanged_path: "docs/library.md".into(),
2439                    commit_count: 1,
2440                    recency_weight: 0.5,
2441                    last_changed_at: Some(older_at),
2442                    sample_commits: vec![older_id],
2443                    test_corun: false,
2444                },
2445            ],
2446            reviewer_evidence: vec![ReviewerEvidence {
2447                id: HistoryRecordId::new("review-newer"),
2448                commit_id: Some(newer_id),
2449                path: None,
2450                reviewer: Owner {
2451                    name: "Reviewer".into(),
2452                    email: Some("reviewer@example.com".into()),
2453                },
2454                role: ReviewerRole::Reviewer,
2455                observed_at: newer_at,
2456                source: "git-trailer:reviewed-by".into(),
2457                confidence: Confidence::High,
2458            }],
2459        }
2460    }
2461
2462    #[test]
2463    fn history_migration_upgrades_legacy_database_idempotently() {
2464        let dir = tempfile::tempdir().unwrap();
2465        let path = dir.path().join("index.sqlite");
2466        let legacy = Connection::open(&path).unwrap();
2467        legacy
2468            .execute_batch(
2469                r#"
2470                PRAGMA user_version = 0;
2471                CREATE TABLE analysis_facts (
2472                  id TEXT PRIMARY KEY,
2473                  file_id TEXT NOT NULL,
2474                  source_type TEXT NOT NULL,
2475                  target TEXT NOT NULL,
2476                  json TEXT NOT NULL
2477                );
2478                INSERT INTO analysis_facts(id, file_id, source_type, target, json)
2479                VALUES('legacy-git', 'f1', 'git_history', 'tests/lib_test.rs', '{}');
2480                "#,
2481            )
2482            .unwrap();
2483        drop(legacy);
2484
2485        let store = SqliteStore::open(&path).unwrap();
2486        store.initialize().unwrap();
2487
2488        let conn = store.connection.lock().unwrap();
2489        let version: i64 = conn
2490            .pragma_query_value(None, "user_version", |row| row.get(0))
2491            .unwrap();
2492        assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
2493        let history_table_count: i64 = conn
2494            .query_row(
2495                "SELECT COUNT(*) FROM sqlite_master
2496                 WHERE type = 'table'
2497                   AND name IN (
2498                     'git_commits',
2499                     'git_file_touches',
2500                     'git_symbol_touches',
2501                     'git_cochange_edges',
2502                     'git_review_events'
2503                   )",
2504                [],
2505                |row| row.get(0),
2506            )
2507            .unwrap();
2508        assert_eq!(history_table_count, 5);
2509        let legacy_fact_count: i64 = conn
2510            .query_row("SELECT COUNT(*) FROM analysis_facts", [], |row| row.get(0))
2511            .unwrap();
2512        assert_eq!(legacy_fact_count, 1);
2513    }
2514
2515    #[test]
2516    fn newer_sqlite_schema_is_rejected_without_mutation() {
2517        let dir = tempfile::tempdir().unwrap();
2518        let path = dir.path().join("future.sqlite");
2519        let future = Connection::open(&path).unwrap();
2520        future
2521            .execute_batch(
2522                r#"
2523                PRAGMA user_version = 3;
2524                CREATE TABLE future_history_marker (id INTEGER PRIMARY KEY);
2525                "#,
2526            )
2527            .unwrap();
2528        drop(future);
2529
2530        let error = match SqliteStore::open(&path) {
2531            Ok(_) => panic!("newer schema should be rejected"),
2532            Err(error) => error.to_string(),
2533        };
2534        assert!(error.contains("newer than supported version 2"));
2535
2536        let conn = Connection::open(&path).unwrap();
2537        let current_table_count: i64 = conn
2538            .query_row(
2539                "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'manifests'",
2540                [],
2541                |row| row.get(0),
2542            )
2543            .unwrap();
2544        assert_eq!(current_table_count, 0);
2545        let future_marker_count: i64 = conn
2546            .query_row(
2547                "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'future_history_marker'",
2548                [],
2549                |row| row.get(0),
2550            )
2551            .unwrap();
2552        assert_eq!(future_marker_count, 1);
2553    }
2554
2555    #[test]
2556    fn history_snapshot_queries_return_typed_evidence() {
2557        let store = make_store();
2558        store.put_history_snapshot(&history_snapshot()).unwrap();
2559
2560        let recent = store.recent_commits(10).unwrap();
2561        assert_eq!(recent.len(), 2);
2562        assert_eq!(recent[0].id.0, "newer");
2563
2564        let neighbors = store
2565            .cochange_neighbors(std::path::Path::new("src/lib.rs"), 10)
2566            .unwrap();
2567        assert_eq!(neighbors.len(), 2);
2568        assert_eq!(
2569            neighbors[0].cochanged_path,
2570            std::path::Path::new("tests/lib_test.rs")
2571        );
2572
2573        let summary = store
2574            .history_for_file(std::path::Path::new("src/lib.rs"), 10)
2575            .unwrap();
2576        assert_eq!(summary.recent_commits.len(), 2);
2577        assert_eq!(summary.file_touches.len(), 2);
2578        assert_eq!(summary.symbol_touches.len(), 1);
2579        assert_eq!(summary.cochange_neighbors.len(), 2);
2580        assert_eq!(summary.reviewer_evidence.len(), 1);
2581        assert!(!summary.truncated);
2582        assert!(summary.uncertainty.is_empty());
2583
2584        let truncated = store
2585            .history_for_file(std::path::Path::new("src/lib.rs"), 1)
2586            .unwrap();
2587        assert!(truncated.truncated);
2588        assert!(truncated
2589            .uncertainty
2590            .iter()
2591            .any(|note| note.contains("truncated")));
2592    }
2593
2594    #[test]
2595    fn provenance_queries_return_first_last_and_explicit_symbol_uncertainty() {
2596        let store = make_store();
2597        let file = make_file("file-1", "src/lib.rs");
2598        let symbol = make_symbol("symbol-1", "history_for_file", "file-1");
2599        let mut unmapped_symbol = make_symbol("symbol-2", "unmapped", "file-1");
2600        unmapped_symbol.range = None;
2601        let manifest = make_manifest();
2602        store
2603            .replace_index(IndexData {
2604                manifest: &manifest,
2605                files: std::slice::from_ref(&file),
2606                symbols: &[symbol.clone(), unmapped_symbol.clone()],
2607                chunks: &[],
2608                tests: &[],
2609                imports: &[],
2610                occurrences: &[],
2611                analysis_facts: &[],
2612            })
2613            .unwrap();
2614        store.put_history_snapshot(&history_snapshot()).unwrap();
2615
2616        let file_provenance = store
2617            .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
2618            .unwrap();
2619        assert_eq!(
2620            file_provenance
2621                .first_seen
2622                .as_ref()
2623                .map(|touch| touch.commit.id.0.as_str()),
2624            Some("older")
2625        );
2626        assert_eq!(
2627            file_provenance
2628                .last_touched
2629                .as_ref()
2630                .map(|touch| touch.commit.id.0.as_str()),
2631            Some("newer")
2632        );
2633        assert_eq!(file_provenance.recent_touches.len(), 2);
2634        assert_eq!(file_provenance.confidence, Confidence::Exact);
2635
2636        let symbol_provenance = store.provenance_for_symbol(&symbol.id, 10).unwrap();
2637        assert_eq!(symbol_provenance.recent_touches.len(), 1);
2638        assert_eq!(symbol_provenance.confidence, Confidence::Medium);
2639        assert_eq!(
2640            symbol_provenance.recent_touches[0].commit.author.name,
2641            "Newer Author"
2642        );
2643        assert_eq!(
2644            symbol_provenance.recent_touches[0].line_ranges,
2645            vec![LineRange { start: 4, end: 8 }]
2646        );
2647        assert!(symbol_provenance
2648            .uncertainty
2649            .iter()
2650            .any(|note| note.contains("earliest line-mapped touch")));
2651
2652        let unmapped = store
2653            .provenance_for_symbol(&unmapped_symbol.id, 10)
2654            .unwrap();
2655        assert!(unmapped.first_seen.is_none());
2656        assert!(unmapped.last_touched.is_none());
2657        assert!(unmapped.recent_touches.is_empty());
2658        assert_eq!(unmapped.confidence, Confidence::Low);
2659        assert!(unmapped
2660            .uncertainty
2661            .iter()
2662            .any(|note| note.contains("no persisted line-level commit mapping")));
2663        assert!(unmapped
2664            .uncertainty
2665            .iter()
2666            .any(|note| note.contains("has no line range")));
2667    }
2668
2669    #[test]
2670    fn path_provenance_follows_rename_aliases_in_both_directions() {
2671        let store = make_store();
2672        let mut snapshot = history_snapshot();
2673        snapshot.file_touches[0].path = "src/old.rs".into();
2674        snapshot.file_touches[1].previous_path = Some("src/old.rs".into());
2675        snapshot.file_touches[1].change_kind = GitChangeKind::Renamed;
2676        store.put_history_snapshot(&snapshot).unwrap();
2677
2678        let current = store
2679            .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
2680            .unwrap();
2681        let historical = store
2682            .provenance_for_path(std::path::Path::new("src/old.rs"), 10)
2683            .unwrap();
2684
2685        assert_eq!(current.recent_touches.len(), 2);
2686        assert_eq!(historical.recent_touches.len(), 2);
2687        assert_eq!(
2688            current
2689                .first_seen
2690                .as_ref()
2691                .map(|touch| touch.path.as_path()),
2692            Some(std::path::Path::new("src/old.rs"))
2693        );
2694    }
2695
2696    #[test]
2697    fn invalid_snapshot_does_not_replace_existing_history() {
2698        let store = make_store();
2699        let snapshot = history_snapshot();
2700        store.put_history_snapshot(&snapshot).unwrap();
2701
2702        let mut invalid = snapshot;
2703        invalid.file_touches[0].commit_id = GitCommitId::new("missing");
2704        let error = store
2705            .put_history_snapshot(&invalid)
2706            .unwrap_err()
2707            .to_string();
2708        assert!(error.contains("references missing commit `missing`"));
2709
2710        let recent = store.recent_commits(10).unwrap();
2711        assert_eq!(recent.len(), 2);
2712        assert_eq!(recent[0].id.0, "newer");
2713
2714        store
2715            .put_history_snapshot(&HistorySnapshot::empty())
2716            .unwrap();
2717        assert!(store.recent_commits(10).unwrap().is_empty());
2718    }
2719
2720    #[test]
2721    fn replace_index_and_list_files() {
2722        let store = make_store();
2723        let file1 = make_file("f1", "src/main.rs");
2724        let file2 = make_file("f2", "src/lib.rs");
2725        let sym1 = make_symbol("s1", "main_fn", "f1");
2726
2727        let manifest = make_manifest();
2728        let files = vec![file1.clone(), file2.clone()];
2729        let symbols = vec![sym1.clone()];
2730
2731        let data = IndexData {
2732            manifest: &manifest,
2733            files: &files,
2734            symbols: &symbols,
2735            occurrences: &[],
2736            chunks: &[],
2737            imports: &[],
2738            tests: &[],
2739            analysis_facts: &[],
2740        };
2741        store.replace_index(data).unwrap();
2742
2743        let files_list = store.list_files(100, 0).unwrap();
2744        assert_eq!(files_list.len(), 2);
2745
2746        let by_path = store
2747            .get_file_by_path(&std::path::PathBuf::from("src/main.rs"))
2748            .unwrap();
2749        assert!(by_path.is_some());
2750        assert_eq!(by_path.unwrap().id, file1.id);
2751    }
2752
2753    #[test]
2754    fn partial_replace_updates_changed_files_and_cleans_deleted_graph_edges() {
2755        let store = make_store();
2756        let manifest = make_manifest();
2757        let file1 = make_file("f1", "src/main.rs");
2758        let file2 = make_file("f2", "src/lib.rs");
2759        let sym1 = make_symbol("s1", "main_fn", "f1");
2760        let sym2 = make_symbol("s2", "lib_fn", "f2");
2761        let old_chunk = CodeChunk {
2762            id: "c1".into(),
2763            file_id: file1.id.clone(),
2764            range: LineRange { start: 1, end: 1 },
2765            language: Language::Rust,
2766            text: "fn main_fn() {}".into(),
2767            symbol_id: Some(sym1.id.clone()),
2768        };
2769        store
2770            .replace_index(IndexData {
2771                manifest: &manifest,
2772                files: &[file1.clone(), file2.clone()],
2773                symbols: &[sym1.clone(), sym2.clone()],
2774                chunks: std::slice::from_ref(&old_chunk),
2775                tests: &[],
2776                imports: &[],
2777                occurrences: &[SymbolOccurrence {
2778                    symbol_id: sym1.id.clone(),
2779                    file_id: file1.id.clone(),
2780                    range: Some(LineRange::single(1)),
2781                    is_definition: true,
2782                    confidence: Confidence::Exact,
2783                    provenance: EvidenceSourceType::StaticAnalysis,
2784                }],
2785                analysis_facts: &[],
2786            })
2787            .unwrap();
2788        let node1 = GraphNode {
2789            id: NodeId::new("symbol:s1"),
2790            node_type: GraphNodeType::Function,
2791            label: "main_fn".into(),
2792            file_id: Some(file1.id.clone()),
2793            symbol_id: Some(sym1.id.clone()),
2794            ..Default::default()
2795        };
2796        let node2 = GraphNode {
2797            id: NodeId::new("symbol:s2"),
2798            node_type: GraphNodeType::Function,
2799            label: "lib_fn".into(),
2800            file_id: Some(file2.id.clone()),
2801            symbol_id: Some(sym2.id.clone()),
2802            ..Default::default()
2803        };
2804        let edge = GraphEdge {
2805            id: EdgeId::new("edge:s1-s2"),
2806            from: node1.id.clone(),
2807            to: node2.id.clone(),
2808            edge_type: GraphEdgeType::References,
2809            evidence: evidence(),
2810            ..Default::default()
2811        };
2812        let node3 = GraphNode {
2813            id: NodeId::new("external:a"),
2814            node_type: GraphNodeType::Module,
2815            label: "external a".into(),
2816            ..Default::default()
2817        };
2818        let node4 = GraphNode {
2819            id: NodeId::new("external:b"),
2820            node_type: GraphNodeType::Module,
2821            label: "external b".into(),
2822            ..Default::default()
2823        };
2824        let mut source_evidence = evidence();
2825        source_evidence.source = "src/main.rs".into();
2826        let source_edge = GraphEdge {
2827            id: EdgeId::new("edge:source-file"),
2828            from: node3.id.clone(),
2829            to: node4.id.clone(),
2830            edge_type: GraphEdgeType::RelatedToTicket,
2831            evidence: source_evidence,
2832            ..Default::default()
2833        };
2834        store
2835            .replace_graph(
2836                &[node1, node2.clone(), node3.clone(), node4.clone()],
2837                &[edge.clone(), source_edge],
2838            )
2839            .unwrap();
2840
2841        let mut updated_file2 = file2.clone();
2842        updated_file2.content_hash = "new-hash".into();
2843        let updated_sym2 = make_symbol("s2b", "lib_fn_new", "f2");
2844        let updated_chunk = CodeChunk {
2845            id: "c2".into(),
2846            file_id: updated_file2.id.clone(),
2847            range: LineRange { start: 2, end: 2 },
2848            language: Language::Rust,
2849            text: "fn lib_fn_new() {}".into(),
2850            symbol_id: Some(updated_sym2.id.clone()),
2851        };
2852        let updated_node2 = GraphNode {
2853            id: NodeId::new("symbol:s2b"),
2854            node_type: GraphNodeType::Function,
2855            label: "lib_fn_new".into(),
2856            file_id: Some(updated_file2.id.clone()),
2857            symbol_id: Some(updated_sym2.id.clone()),
2858            ..Default::default()
2859        };
2860        store
2861            .replace_files_index(PartialIndexUpdate {
2862                manifest: &manifest,
2863                changed_files: std::slice::from_ref(&updated_file2),
2864                deleted_file_ids: std::slice::from_ref(&file1.id),
2865                symbols: std::slice::from_ref(&updated_sym2),
2866                chunks: std::slice::from_ref(&updated_chunk),
2867                tests: &[],
2868                imports: &[],
2869                occurrences: &[],
2870                analysis_facts: &[],
2871                graph_nodes: std::slice::from_ref(&updated_node2),
2872                graph_edges: &[],
2873            })
2874            .unwrap();
2875
2876        assert!(store
2877            .get_file_by_path(std::path::Path::new("src/main.rs"))
2878            .unwrap()
2879            .is_none());
2880        assert_eq!(
2881            store
2882                .get_file_by_path(std::path::Path::new("src/lib.rs"))
2883                .unwrap()
2884                .unwrap()
2885                .content_hash,
2886            "new-hash"
2887        );
2888        assert!(store.symbol_by_id(&sym1.id).unwrap().is_none());
2889        assert!(store.symbol_by_id(&updated_sym2.id).unwrap().is_some());
2890        assert!(store.chunks_for_file(&file1.id).unwrap().is_empty());
2891        assert_eq!(store.chunks_for_file(&file2.id).unwrap()[0].id, "c2");
2892        let edge_count: i64 = store
2893            .connection
2894            .lock()
2895            .unwrap()
2896            .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
2897            .unwrap();
2898        assert_eq!(edge_count, 0);
2899        assert!(store.node_by_id("symbol:s1").unwrap().is_none());
2900        assert!(store.node_by_id("symbol:s2b").unwrap().is_some());
2901    }
2902
2903    #[test]
2904    fn partial_replace_rolls_back_on_insert_failure() {
2905        let store = make_store();
2906        let manifest = make_manifest();
2907        let file = make_file("f1", "src/lib.rs");
2908        store
2909            .replace_index(IndexData {
2910                manifest: &manifest,
2911                files: std::slice::from_ref(&file),
2912                symbols: &[],
2913                chunks: &[],
2914                tests: &[],
2915                imports: &[],
2916                occurrences: &[],
2917                analysis_facts: &[],
2918            })
2919            .unwrap();
2920
2921        let duplicate_a = make_file("f2", "src/dup.rs");
2922        let mut duplicate_b = make_file("f3", "src/dup.rs");
2923        duplicate_b.content_hash = "other".into();
2924        let error = store
2925            .replace_files_index(PartialIndexUpdate {
2926                manifest: &manifest,
2927                changed_files: &[duplicate_a, duplicate_b],
2928                deleted_file_ids: std::slice::from_ref(&file.id),
2929                symbols: &[],
2930                chunks: &[],
2931                tests: &[],
2932                imports: &[],
2933                occurrences: &[],
2934                analysis_facts: &[],
2935                graph_nodes: &[],
2936                graph_edges: &[],
2937            })
2938            .unwrap_err()
2939            .to_string();
2940        assert!(error.contains("UNIQUE") || error.contains("constraint"));
2941        assert!(store
2942            .get_file_by_path(std::path::Path::new("src/lib.rs"))
2943            .unwrap()
2944            .is_some());
2945        assert!(store
2946            .get_file_by_path(std::path::Path::new("src/dup.rs"))
2947            .unwrap()
2948            .is_none());
2949    }
2950
2951    #[test]
2952    fn replace_index_persists_analysis_facts() {
2953        let store = make_store();
2954        let file = make_file("f1", "src/handler.rs");
2955        let manifest = make_manifest();
2956        let runtime_fact = AnalysisFact {
2957            id: "runtime-1".into(),
2958            file_id: file.id.clone(),
2959            symbol_id: None,
2960            target: "GET /api/orders".into(),
2961            target_kind: GraphNodeType::Endpoint,
2962            edge_type: GraphEdgeType::ExposesEndpoint,
2963            range: Some(LineRange::single(12)),
2964            confidence: Confidence::High,
2965            source: "open-kioku-runtime:.ok/runtime/spans.jsonl".into(),
2966            source_type: EvidenceSourceType::Runtime,
2967            message: "runtime endpoint observed in local trace artifact".into(),
2968        };
2969        let static_fact = AnalysisFact {
2970            id: "static-1".into(),
2971            file_id: file.id.clone(),
2972            symbol_id: None,
2973            target: "orders".into(),
2974            target_kind: GraphNodeType::DatabaseTable,
2975            edge_type: GraphEdgeType::ReadsTable,
2976            range: None,
2977            confidence: Confidence::Medium,
2978            source: "open-kioku-static".into(),
2979            source_type: EvidenceSourceType::StaticAnalysis,
2980            message: "static fact".into(),
2981        };
2982        let git_fact = AnalysisFact {
2983            id: "git-1".into(),
2984            file_id: file.id.clone(),
2985            symbol_id: None,
2986            target: "tests/handler_test.rs".into(),
2987            target_kind: GraphNodeType::Test,
2988            edge_type: GraphEdgeType::ChangedBy,
2989            range: None,
2990            confidence: Confidence::High,
2991            source: "git-history:abc123".into(),
2992            source_type: EvidenceSourceType::GitHistory,
2993            message: "git co-change observed in 1 commit(s), recency weight 1.00".into(),
2994        };
2995
2996        store
2997            .replace_index(IndexData {
2998                manifest: &manifest,
2999                files: &[file],
3000                symbols: &[],
3001                occurrences: &[],
3002                chunks: &[],
3003                imports: &[],
3004                tests: &[],
3005                analysis_facts: &[runtime_fact.clone(), static_fact, git_fact.clone()],
3006            })
3007            .unwrap();
3008
3009        let runtime = store
3010            .analysis_facts(Some(EvidenceSourceType::Runtime), 10)
3011            .unwrap();
3012        assert_eq!(runtime.len(), 1);
3013        assert_eq!(runtime[0].id, runtime_fact.id);
3014        assert_eq!(runtime[0].target, runtime_fact.target);
3015        let git = store
3016            .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
3017            .unwrap();
3018        assert_eq!(git.len(), 1);
3019        assert_eq!(git[0].id, git_fact.id);
3020        assert_eq!(git[0].target, git_fact.target);
3021        let all = store.analysis_facts(None, 10).unwrap();
3022        assert_eq!(all.len(), 3);
3023    }
3024
3025    #[test]
3026    fn replace_index_preserves_typed_and_legacy_history() {
3027        let store = make_store();
3028        store.put_history_snapshot(&history_snapshot()).unwrap();
3029
3030        let file = make_file("f1", "src/lib.rs");
3031        let manifest = make_manifest();
3032        let git_fact = AnalysisFact {
3033            id: "legacy-git-1".into(),
3034            file_id: file.id.clone(),
3035            symbol_id: None,
3036            target: "tests/lib_test.rs".into(),
3037            target_kind: GraphNodeType::Test,
3038            edge_type: GraphEdgeType::ChangedBy,
3039            range: None,
3040            confidence: Confidence::High,
3041            source: "git-history:newer".into(),
3042            source_type: EvidenceSourceType::GitHistory,
3043            message: "legacy co-change compatibility fact".into(),
3044        };
3045
3046        for _ in 0..2 {
3047            store
3048                .replace_index(IndexData {
3049                    manifest: &manifest,
3050                    files: std::slice::from_ref(&file),
3051                    symbols: &[],
3052                    occurrences: &[],
3053                    chunks: &[],
3054                    imports: &[],
3055                    tests: &[],
3056                    analysis_facts: std::slice::from_ref(&git_fact),
3057                })
3058                .unwrap();
3059        }
3060
3061        assert_eq!(store.recent_commits(10).unwrap().len(), 2);
3062        let summary = store
3063            .history_for_file(std::path::Path::new("src/lib.rs"), 10)
3064            .unwrap();
3065        assert_eq!(summary.file_touches.len(), 2);
3066        let legacy = store
3067            .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
3068            .unwrap();
3069        assert_eq!(legacy.len(), 1);
3070        assert_eq!(legacy[0].id, git_fact.id);
3071    }
3072
3073    #[test]
3074    fn list_symbols_with_filter() {
3075        let store = make_store();
3076        let file = make_file("f1", "src/lib.rs");
3077        let sym_a = make_symbol("s1", "alpha_handler", "f1");
3078        let sym_b = make_symbol("s2", "beta_worker", "f1");
3079        let manifest = make_manifest();
3080        let files = vec![file];
3081        let symbols = vec![sym_a, sym_b];
3082        let data = IndexData {
3083            manifest: &manifest,
3084            files: &files,
3085            symbols: &symbols,
3086            occurrences: &[],
3087            chunks: &[],
3088            imports: &[],
3089            tests: &[],
3090            analysis_facts: &[],
3091        };
3092        store.replace_index(data).unwrap();
3093
3094        let all = store.list_symbols(None, 100, 0).unwrap();
3095        assert_eq!(all.len(), 2);
3096
3097        let filtered = store.list_symbols(Some("alpha"), 10, 0).unwrap();
3098        assert_eq!(filtered.len(), 1);
3099        assert_eq!(filtered[0].name, "alpha_handler");
3100    }
3101
3102    #[test]
3103    fn replace_graph_and_neighbors() {
3104        let store = make_store();
3105        // First we need an index so that the graph tables exist.
3106        let file = make_file("f1", "src/lib.rs");
3107        let manifest = make_manifest();
3108        let files = vec![file];
3109        let data = IndexData {
3110            manifest: &manifest,
3111            files: &files,
3112            symbols: &[],
3113            occurrences: &[],
3114            chunks: &[],
3115            imports: &[],
3116            tests: &[],
3117            analysis_facts: &[],
3118        };
3119        store.replace_index(data).unwrap();
3120
3121        let node_a = GraphNode {
3122            id: NodeId::new("file:src/lib.rs"),
3123            node_type: GraphNodeType::File,
3124            label: "src/lib.rs".into(),
3125            file_id: Some(FileId::new("f1")),
3126            symbol_id: None,
3127            ..Default::default()
3128        };
3129        let node_b = GraphNode {
3130            id: NodeId::new("symbol:s1"),
3131            node_type: GraphNodeType::Function,
3132            label: "worker".into(),
3133            file_id: Some(FileId::new("f1")),
3134            symbol_id: Some(SymbolId::new("s1")),
3135            ..Default::default()
3136        };
3137        let edge = GraphEdge {
3138            id: EdgeId::new("e1"),
3139            from: node_a.id.clone(),
3140            to: node_b.id.clone(),
3141            edge_type: GraphEdgeType::Defines,
3142            evidence: evidence(),
3143            ..Default::default()
3144        };
3145
3146        store
3147            .replace_graph(
3148                &[node_a.clone(), node_b.clone()],
3149                std::slice::from_ref(&edge),
3150            )
3151            .unwrap();
3152
3153        let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
3154        assert_eq!(edges.len(), 1);
3155        assert_eq!(edges[0].id.0, "e1");
3156        assert!(nodes.iter().any(|n| n.id == node_a.id));
3157    }
3158
3159    #[test]
3160    fn graph_facts_with_properties_and_confidence_metadata_round_trip() {
3161        let store = make_store();
3162        let file = make_file("f1", "src/lib.rs");
3163        let manifest = make_manifest();
3164        let files = vec![file];
3165        let data = IndexData {
3166            manifest: &manifest,
3167            files: &files,
3168            symbols: &[],
3169            occurrences: &[],
3170            chunks: &[],
3171            imports: &[],
3172            tests: &[],
3173            analysis_facts: &[],
3174        };
3175        store.replace_index(data).unwrap();
3176
3177        let node_a = GraphNode {
3178            id: NodeId::new("file:src/lib.rs"),
3179            node_type: GraphNodeType::File,
3180            label: "src/lib.rs".into(),
3181            file_id: Some(FileId::new("f1")),
3182            properties: BTreeMap::from([("package".into(), serde_json::json!("open-kioku"))]),
3183            schema_version: Some("graph-v1".into()),
3184            source_pass: Some("tree_sitter".into()),
3185            index_mode: Some("full".into()),
3186            extractor_version: Some("test-extractor".into()),
3187            ambiguity: vec!["generated file status unknown".into()],
3188            quality_notes: vec!["file path verified".into()],
3189            ..Default::default()
3190        };
3191        let node_b = GraphNode {
3192            id: NodeId::new("symbol:s1"),
3193            node_type: GraphNodeType::Function,
3194            label: "worker".into(),
3195            file_id: Some(FileId::new("f1")),
3196            symbol_id: Some(SymbolId::new("s1")),
3197            ..Default::default()
3198        };
3199        let mut edge_evidence = evidence();
3200        edge_evidence.confidence_score = Some(0.98);
3201        edge_evidence.confidence_reason = Some("exact symbol occurrence".into());
3202        edge_evidence.freshness = Some("fresh".into());
3203        let edge = GraphEdge {
3204            id: EdgeId::new("e1"),
3205            from: node_a.id.clone(),
3206            to: node_b.id.clone(),
3207            edge_type: GraphEdgeType::Defines,
3208            evidence: edge_evidence,
3209            properties: BTreeMap::from([("relation".into(), serde_json::json!("definition"))]),
3210            schema_version: Some("graph-v1".into()),
3211            source_pass: Some("scip".into()),
3212            index_mode: Some("full".into()),
3213            extractor_version: Some("test-scip".into()),
3214            ambiguity: vec!["macro expansion not modeled".into()],
3215            quality_notes: vec!["exact definition edge".into()],
3216        };
3217
3218        store
3219            .replace_graph(
3220                &[node_a.clone(), node_b.clone()],
3221                std::slice::from_ref(&edge),
3222            )
3223            .unwrap();
3224
3225        let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
3226        let stored_node = nodes.iter().find(|node| node.id == node_a.id).unwrap();
3227        assert_eq!(stored_node.properties, node_a.properties);
3228        assert_eq!(stored_node.schema_version.as_deref(), Some("graph-v1"));
3229        assert_eq!(stored_node.source_pass.as_deref(), Some("tree_sitter"));
3230        assert_eq!(stored_node.quality_notes, vec!["file path verified"]);
3231
3232        assert_eq!(edges.len(), 1);
3233        let stored_edge = &edges[0];
3234        assert_eq!(stored_edge.properties, edge.properties);
3235        assert_eq!(stored_edge.schema_version.as_deref(), Some("graph-v1"));
3236        assert_eq!(stored_edge.evidence.confidence_score, Some(0.98));
3237        assert_eq!(
3238            stored_edge.evidence.confidence_reason.as_deref(),
3239            Some("exact symbol occurrence")
3240        );
3241        assert_eq!(stored_edge.evidence.freshness.as_deref(), Some("fresh"));
3242
3243        let indexed_confidence: String = store
3244            .connection
3245            .lock()
3246            .unwrap()
3247            .query_row(
3248                "SELECT confidence FROM graph_edges WHERE id = 'e1'",
3249                [],
3250                |row| row.get(0),
3251            )
3252            .unwrap();
3253        assert_eq!(indexed_confidence, "Medium");
3254    }
3255
3256    #[test]
3257    fn shortest_path_finds_direct_route() {
3258        let store = make_store();
3259        let file = make_file("f1", "src/lib.rs");
3260        let manifest = make_manifest();
3261        let files = vec![file];
3262        let data = IndexData {
3263            manifest: &manifest,
3264            files: &files,
3265            symbols: &[],
3266            occurrences: &[],
3267            chunks: &[],
3268            imports: &[],
3269            tests: &[],
3270            analysis_facts: &[],
3271        };
3272        store.replace_index(data).unwrap();
3273
3274        let node_a = GraphNode {
3275            id: NodeId::new("a"),
3276            node_type: GraphNodeType::File,
3277            label: "a".into(),
3278            file_id: None,
3279            symbol_id: None,
3280            ..Default::default()
3281        };
3282        let node_b = GraphNode {
3283            id: NodeId::new("b"),
3284            node_type: GraphNodeType::File,
3285            label: "b".into(),
3286            file_id: None,
3287            symbol_id: None,
3288            ..Default::default()
3289        };
3290        let edge = GraphEdge {
3291            id: EdgeId::new("a-b"),
3292            from: node_a.id.clone(),
3293            to: node_b.id.clone(),
3294            edge_type: GraphEdgeType::Defines,
3295            evidence: evidence(),
3296            ..Default::default()
3297        };
3298        store.replace_graph(&[node_a, node_b], &[edge]).unwrap();
3299
3300        let path = store.shortest_path("a", "b", 5).unwrap();
3301        assert_eq!(path.len(), 1);
3302        assert_eq!(path[0].id.0, "a-b");
3303    }
3304
3305    #[test]
3306    fn shortest_path_returns_empty_when_no_route() {
3307        let store = make_store();
3308        let file = make_file("f1", "src/lib.rs");
3309        let manifest = make_manifest();
3310        let files = vec![file];
3311        let data = IndexData {
3312            manifest: &manifest,
3313            files: &files,
3314            symbols: &[],
3315            occurrences: &[],
3316            chunks: &[],
3317            imports: &[],
3318            tests: &[],
3319            analysis_facts: &[],
3320        };
3321        store.replace_index(data).unwrap();
3322        store.replace_graph(&[], &[]).unwrap();
3323
3324        let path = store.shortest_path("x", "y", 5).unwrap();
3325        assert!(path.is_empty());
3326    }
3327
3328    #[test]
3329    fn test_old_graph_tables_migrate_and_replace_graph_backfills_columns() {
3330        let store = make_store();
3331        let legacy_file = GraphNode {
3332            id: NodeId::new("legacy_file"),
3333            node_type: GraphNodeType::File,
3334            label: "legacy.rs".into(),
3335            file_id: Some(FileId::new("f1")),
3336            ..Default::default()
3337        };
3338        let legacy_symbol = GraphNode {
3339            id: NodeId::new("legacy_symbol"),
3340            node_type: GraphNodeType::Function,
3341            label: "legacy_fn".into(),
3342            symbol_id: Some(SymbolId::new("s1")),
3343            ..Default::default()
3344        };
3345        let mut legacy_evidence = evidence();
3346        legacy_evidence.source_type = EvidenceSourceType::Scip;
3347        legacy_evidence.source = "index.scip".into();
3348        let legacy_edge = GraphEdge {
3349            id: EdgeId::new("legacy_edge"),
3350            from: legacy_file.id.clone(),
3351            to: legacy_symbol.id.clone(),
3352            edge_type: GraphEdgeType::Defines,
3353            evidence: legacy_evidence,
3354            ..Default::default()
3355        };
3356        {
3357            let conn = store.connection.lock().unwrap();
3358            conn.execute("DROP TABLE graph_nodes", []).unwrap();
3359            conn.execute("DROP TABLE graph_edges", []).unwrap();
3360            conn.execute(
3361                "CREATE TABLE graph_nodes(id TEXT PRIMARY KEY, label TEXT, json TEXT)",
3362                [],
3363            )
3364            .unwrap();
3365            conn.execute("CREATE TABLE graph_edges(id TEXT PRIMARY KEY, from_id TEXT, to_id TEXT, edge_type TEXT, json TEXT)", []).unwrap();
3366            conn.execute(
3367                "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
3368                params![
3369                    legacy_file.id.0.as_str(),
3370                    legacy_file.label.as_str(),
3371                    serde_json::to_string(&legacy_file).unwrap(),
3372                ],
3373            )
3374            .unwrap();
3375            conn.execute(
3376                "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
3377                params![
3378                    legacy_symbol.id.0.as_str(),
3379                    legacy_symbol.label.as_str(),
3380                    serde_json::to_string(&legacy_symbol).unwrap(),
3381                ],
3382            )
3383            .unwrap();
3384            conn.execute(
3385                "INSERT INTO graph_edges(id, from_id, to_id, edge_type, json)
3386                 VALUES(?1, ?2, ?3, '', ?4)",
3387                params![
3388                    legacy_edge.id.0.as_str(),
3389                    legacy_edge.from.0.as_str(),
3390                    legacy_edge.to.0.as_str(),
3391                    serde_json::to_string(&legacy_edge).unwrap(),
3392                ],
3393            )
3394            .unwrap();
3395        }
3396        store.initialize().unwrap();
3397        store.initialize().unwrap();
3398
3399        let migrated_nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
3400        assert_eq!(migrated_nodes.len(), 1);
3401        assert_eq!(migrated_nodes[0].id.0, "legacy_file");
3402
3403        let migrated_edges = store.edges_by_type(GraphEdgeType::Defines, 10, 0).unwrap();
3404        assert_eq!(migrated_edges.len(), 1);
3405        assert_eq!(migrated_edges[0].id.0, "legacy_edge");
3406        let migrated_between = store
3407            .graph_edges_between("legacy_file", "legacy_symbol", 10)
3408            .unwrap();
3409        assert_eq!(migrated_between.len(), 1);
3410
3411        let migrated_counts = store.graph_schema_counts().unwrap();
3412        assert_eq!(migrated_counts.node_types.get("File"), Some(&1));
3413        assert_eq!(migrated_counts.edge_types.get("Defines"), Some(&1));
3414
3415        let node = GraphNode {
3416            id: NodeId::new("test_node"),
3417            node_type: GraphNodeType::File,
3418            label: "test".into(),
3419            ..Default::default()
3420        };
3421        store.replace_graph(&[node], &[]).unwrap();
3422
3423        let count: i64 = store
3424            .connection
3425            .lock()
3426            .unwrap()
3427            .query_row(
3428                "SELECT COUNT(*) FROM graph_nodes WHERE node_type = 'File'",
3429                [],
3430                |r| r.get(0),
3431            )
3432            .unwrap();
3433        assert_eq!(count, 1);
3434
3435        let version: i64 = store
3436            .connection
3437            .lock()
3438            .unwrap()
3439            .pragma_query_value(None, "user_version", |row| row.get(0))
3440            .unwrap();
3441        assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
3442
3443        let index_count: i64 = store
3444            .connection
3445            .lock()
3446            .unwrap()
3447            .query_row(
3448                "SELECT COUNT(*) FROM sqlite_master
3449                 WHERE type = 'index'
3450                   AND name IN (
3451                     'idx_graph_nodes_type',
3452                     'idx_graph_nodes_file',
3453                     'idx_graph_nodes_symbol',
3454                     'idx_graph_edges_type',
3455                     'idx_graph_edges_from_type',
3456                     'idx_graph_edges_to_type',
3457                     'idx_graph_edges_source_type'
3458                   )",
3459                [],
3460                |row| row.get(0),
3461            )
3462            .unwrap();
3463        assert_eq!(index_count, 7);
3464    }
3465
3466    #[test]
3467    fn test_nodes_by_type_uses_indexed_column() {
3468        let store = make_store();
3469        let node1 = GraphNode {
3470            id: NodeId::new("n1"),
3471            node_type: GraphNodeType::File,
3472            ..Default::default()
3473        };
3474        let node2 = GraphNode {
3475            id: NodeId::new("n2"),
3476            node_type: GraphNodeType::File,
3477            ..Default::default()
3478        };
3479        let node3 = GraphNode {
3480            id: NodeId::new("n3"),
3481            node_type: GraphNodeType::Function,
3482            ..Default::default()
3483        };
3484        store
3485            .replace_graph(&[node2.clone(), node3.clone(), node1.clone()], &[])
3486            .unwrap();
3487
3488        let nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
3489        assert_eq!(nodes.len(), 2);
3490        assert_eq!(nodes[0].id.0, "n1");
3491        assert_eq!(nodes[1].id.0, "n2");
3492    }
3493
3494    #[test]
3495    fn test_edges_by_type_uses_indexed_column() {
3496        let store = make_store();
3497        let node1 = GraphNode {
3498            id: NodeId::new("n1"),
3499            ..Default::default()
3500        };
3501        let node2 = GraphNode {
3502            id: NodeId::new("n2"),
3503            ..Default::default()
3504        };
3505        let edge1 = GraphEdge {
3506            id: EdgeId::new("e1"),
3507            from: NodeId::new("n1"),
3508            to: NodeId::new("n2"),
3509            edge_type: GraphEdgeType::Calls,
3510            ..Default::default()
3511        };
3512        let edge2 = GraphEdge {
3513            id: EdgeId::new("e2"),
3514            from: NodeId::new("n1"),
3515            to: NodeId::new("n2"),
3516            edge_type: GraphEdgeType::Calls,
3517            ..Default::default()
3518        };
3519        let edge3 = GraphEdge {
3520            id: EdgeId::new("e3"),
3521            from: NodeId::new("n1"),
3522            to: NodeId::new("n2"),
3523            edge_type: GraphEdgeType::Defines,
3524            ..Default::default()
3525        };
3526        store
3527            .replace_graph(
3528                &[node1, node2],
3529                &[edge2.clone(), edge3.clone(), edge1.clone()],
3530            )
3531            .unwrap();
3532
3533        let edges = store.edges_by_type(GraphEdgeType::Calls, 10, 0).unwrap();
3534        assert_eq!(edges.len(), 2);
3535        assert_eq!(edges[0].id.0, "e1");
3536        assert_eq!(edges[1].id.0, "e2");
3537    }
3538
3539    #[test]
3540    fn test_graph_edges_between_respects_limit() {
3541        let store = make_store();
3542        let node1 = GraphNode {
3543            id: NodeId::new("n1"),
3544            ..Default::default()
3545        };
3546        let node2 = GraphNode {
3547            id: NodeId::new("n2"),
3548            ..Default::default()
3549        };
3550        let edge1 = GraphEdge {
3551            id: EdgeId::new("e1"),
3552            from: NodeId::new("n1"),
3553            to: NodeId::new("n2"),
3554            ..Default::default()
3555        };
3556        let edge2 = GraphEdge {
3557            id: EdgeId::new("e2"),
3558            from: NodeId::new("n1"),
3559            to: NodeId::new("n2"),
3560            ..Default::default()
3561        };
3562        store
3563            .replace_graph(&[node1, node2], &[edge2.clone(), edge1.clone()])
3564            .unwrap();
3565
3566        let edges = store.graph_edges_between("n1", "n2", 1).unwrap();
3567        assert_eq!(edges.len(), 1);
3568        assert_eq!(edges[0].id.0, "e1");
3569    }
3570
3571    #[test]
3572    fn test_query_limit_is_capped() {
3573        assert_eq!(super::clamp_limit(0), 100);
3574        assert_eq!(super::clamp_limit(5), 5);
3575        assert_eq!(super::clamp_limit(5000), 1000);
3576    }
3577
3578    #[test]
3579    fn test_graph_schema_counts_returns_sorted_type_counts() {
3580        let store = make_store();
3581        let node1 = GraphNode {
3582            id: NodeId::new("n1"),
3583            node_type: GraphNodeType::File,
3584            ..Default::default()
3585        };
3586        let node2 = GraphNode {
3587            id: NodeId::new("n2"),
3588            node_type: GraphNodeType::File,
3589            ..Default::default()
3590        };
3591        let node3 = GraphNode {
3592            id: NodeId::new("n3"),
3593            node_type: GraphNodeType::Function,
3594            ..Default::default()
3595        };
3596        let edge1 = GraphEdge {
3597            id: EdgeId::new("e1"),
3598            from: NodeId::new("n1"),
3599            to: NodeId::new("n2"),
3600            edge_type: GraphEdgeType::Calls,
3601            ..Default::default()
3602        };
3603        store
3604            .replace_graph(&[node1, node2, node3], &[edge1])
3605            .unwrap();
3606
3607        let counts = store.graph_schema_counts().unwrap();
3608        assert_eq!(counts.node_types.get("File"), Some(&2));
3609        assert_eq!(counts.node_types.get("Function"), Some(&1));
3610        assert_eq!(counts.edge_types.get("Calls"), Some(&1));
3611    }
3612
3613    #[test]
3614    fn test_graph_counts_returns_total_nodes_and_edges() {
3615        let store = make_store();
3616        let node1 = GraphNode {
3617            id: NodeId::new("n1"),
3618            node_type: GraphNodeType::File,
3619            ..Default::default()
3620        };
3621        let node2 = GraphNode {
3622            id: NodeId::new("n2"),
3623            node_type: GraphNodeType::File,
3624            ..Default::default()
3625        };
3626        let edge1 = GraphEdge {
3627            id: EdgeId::new("e1"),
3628            from: NodeId::new("n1"),
3629            to: NodeId::new("n2"),
3630            edge_type: GraphEdgeType::Calls,
3631            ..Default::default()
3632        };
3633        store.replace_graph(&[node1, node2], &[edge1]).unwrap();
3634
3635        let overall = store.graph_counts().unwrap();
3636        assert_eq!(overall.nodes, 2);
3637        assert_eq!(overall.edges, 1);
3638    }
3639}