Skip to main content

open_kioku_storage_sqlite/
lib.rs

1use chrono::{DateTime, Utc};
2use open_kioku_core::{
3    AnalysisFact, ChurnEntityKind, ChurnStats, ChurnSummary, CodeChunk, Confidence,
4    EvidenceSourceType, File, FileId, FileProvenance, GitCochangeEdge, GitCommitId,
5    GitCommitRecord, GitFileTouch, GitSymbolTouch, GraphEdge, GraphEdgeType, GraphNode,
6    GraphNodeType, HistoricalChangeSummary, HistoryRecordId, HistorySnapshot, HistorySummary,
7    Import, IndexManifest, ProvenanceTouch, SimilarChangeHit, SimilarChangeQuery,
8    SimilarChangeReport, SimilarityEvidence, SimilarityEvidenceSource, Symbol, SymbolId,
9    SymbolOccurrence, SymbolProvenance, TestTarget, HISTORY_SCHEMA_VERSION,
10};
11use open_kioku_errors::{OkError, Result};
12use open_kioku_storage::{
13    GraphCounts, GraphSchemaCounts, GraphStore, HistoryStore, IndexData, MetadataStore,
14    PartialIndexUpdate,
15};
16use rusqlite::{params, Connection, OptionalExtension, Transaction};
17use std::collections::{BTreeMap, BTreeSet};
18use std::path::{Path, PathBuf};
19use std::sync::Mutex;
20
21const SQLITE_HISTORY_SCHEMA_VERSION: i64 = 1;
22pub const SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION: i64 = 2;
23const SQLITE_GRAPH_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
24const SQLITE_SUPPORTED_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
25
26const HISTORY_SCHEMA_V1: &str = r#"
27CREATE TABLE IF NOT EXISTS git_commits (
28  id TEXT PRIMARY KEY,
29  authored_at TEXT NOT NULL,
30  committed_at TEXT NOT NULL,
31  author_email TEXT,
32  json TEXT NOT NULL
33);
34CREATE INDEX IF NOT EXISTS idx_git_commits_committed_at
35  ON git_commits(committed_at DESC, id);
36CREATE INDEX IF NOT EXISTS idx_git_commits_author_email
37  ON git_commits(author_email);
38
39CREATE TABLE IF NOT EXISTS git_file_touches (
40  id TEXT PRIMARY KEY,
41  commit_id TEXT NOT NULL,
42  path TEXT NOT NULL,
43  previous_path TEXT,
44  touched_at TEXT NOT NULL,
45  json TEXT NOT NULL,
46  FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
47);
48CREATE INDEX IF NOT EXISTS idx_git_file_touches_path
49  ON git_file_touches(path, touched_at DESC);
50CREATE INDEX IF NOT EXISTS idx_git_file_touches_previous_path
51  ON git_file_touches(previous_path, touched_at DESC);
52CREATE INDEX IF NOT EXISTS idx_git_file_touches_commit
53  ON git_file_touches(commit_id);
54
55CREATE TABLE IF NOT EXISTS git_symbol_touches (
56  id TEXT PRIMARY KEY,
57  commit_id TEXT NOT NULL,
58  symbol_id TEXT,
59  qualified_name TEXT NOT NULL,
60  file_path TEXT NOT NULL,
61  touched_at TEXT NOT NULL,
62  json TEXT NOT NULL,
63  FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
64);
65CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_file
66  ON git_symbol_touches(file_path, touched_at DESC);
67CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_symbol
68  ON git_symbol_touches(symbol_id, touched_at DESC);
69CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_commit
70  ON git_symbol_touches(commit_id);
71
72CREATE TABLE IF NOT EXISTS git_cochange_edges (
73  id TEXT PRIMARY KEY,
74  path TEXT NOT NULL,
75  cochanged_path TEXT NOT NULL,
76  commit_count INTEGER NOT NULL,
77  recency_weight REAL NOT NULL,
78  last_changed_at TEXT,
79  json TEXT NOT NULL,
80  UNIQUE(path, cochanged_path)
81);
82CREATE INDEX IF NOT EXISTS idx_git_cochange_edges_path
83  ON git_cochange_edges(path, recency_weight DESC, commit_count DESC);
84
85CREATE TABLE IF NOT EXISTS git_review_events (
86  id TEXT PRIMARY KEY,
87  commit_id TEXT,
88  path TEXT,
89  reviewer_identity TEXT NOT NULL,
90  observed_at TEXT NOT NULL,
91  json TEXT NOT NULL
92);
93CREATE INDEX IF NOT EXISTS idx_git_review_events_path
94  ON git_review_events(path, observed_at DESC);
95CREATE INDEX IF NOT EXISTS idx_git_review_events_commit
96  ON git_review_events(commit_id, observed_at DESC);
97CREATE INDEX IF NOT EXISTS idx_git_review_events_reviewer
98  ON git_review_events(reviewer_identity, observed_at DESC);
99
100CREATE TABLE IF NOT EXISTS history_hotspots (
101  entity_kind TEXT NOT NULL,
102  entity_key TEXT NOT NULL,
103  path TEXT,
104  symbol_id TEXT,
105  qualified_name TEXT,
106  hotspot_score REAL NOT NULL,
107  touch_count INTEGER NOT NULL,
108  generated_at TEXT NOT NULL,
109  json TEXT NOT NULL,
110  PRIMARY KEY(entity_kind, entity_key)
111);
112CREATE INDEX IF NOT EXISTS idx_history_hotspots_kind_score
113  ON history_hotspots(entity_kind, hotspot_score DESC, touch_count DESC, entity_key);
114CREATE INDEX IF NOT EXISTS idx_history_hotspots_path
115  ON history_hotspots(path);
116CREATE INDEX IF NOT EXISTS idx_history_hotspots_symbol
117  ON history_hotspots(symbol_id);
118"#;
119
120pub struct SqliteStore {
121    path: PathBuf,
122    connection: Mutex<Connection>,
123}
124
125impl SqliteStore {
126    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
127        let path = path.as_ref().to_path_buf();
128        if let Some(parent) = path.parent() {
129            std::fs::create_dir_all(parent)?;
130        }
131        let connection = Connection::open_with_flags(
132            &path,
133            rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE
134                | rusqlite::OpenFlags::SQLITE_OPEN_CREATE
135                | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
136        )
137        .map_err(storage_err)?;
138        let store = Self {
139            path,
140            connection: Mutex::new(connection),
141        };
142        store.initialize()?;
143        Ok(store)
144    }
145
146    pub fn path(&self) -> &Path {
147        &self.path
148    }
149
150    fn churn_by_kind_and_key<F>(
151        &self,
152        kind: ChurnEntityKind,
153        key: &str,
154        missing: F,
155    ) -> Result<ChurnSummary>
156    where
157        F: FnOnce() -> ChurnSummary,
158    {
159        let conn = self
160            .connection
161            .lock()
162            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
163        let raw = conn
164            .query_row(
165                "SELECT json FROM history_hotspots WHERE entity_kind = ?1 AND entity_key = ?2",
166                params![churn_entity_kind_key(kind), key],
167                |row| row.get::<_, String>(0),
168            )
169            .optional()
170            .map_err(storage_err)?;
171        match raw {
172            Some(raw) => Ok(serde_json::from_str(&raw)?),
173            None => Ok(missing()),
174        }
175    }
176}
177
178impl MetadataStore for SqliteStore {
179    fn initialize(&self) -> Result<()> {
180        let mut conn = self
181            .connection
182            .lock()
183            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
184        ensure_supported_sqlite_schema(&conn)?;
185        conn.execute_batch(
186            r#"
187            PRAGMA journal_mode = WAL;
188            PRAGMA foreign_keys = ON;
189            CREATE TABLE IF NOT EXISTS manifests (
190              id INTEGER PRIMARY KEY CHECK (id = 1),
191              json TEXT NOT NULL
192            );
193            CREATE TABLE IF NOT EXISTS files (
194              id TEXT PRIMARY KEY,
195              path TEXT NOT NULL UNIQUE,
196              json TEXT NOT NULL
197            );
198            CREATE TABLE IF NOT EXISTS symbols (
199              id TEXT PRIMARY KEY,
200              name TEXT NOT NULL,
201              qualified_name TEXT NOT NULL,
202              file_id TEXT NOT NULL,
203              json TEXT NOT NULL
204            );
205            CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
206            CREATE TABLE IF NOT EXISTS chunks (
207              id TEXT PRIMARY KEY,
208              file_id TEXT NOT NULL,
209              start_line INTEGER NOT NULL,
210              end_line INTEGER NOT NULL,
211              text TEXT NOT NULL,
212              json TEXT NOT NULL
213            );
214            CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
215            CREATE TABLE IF NOT EXISTS tests (
216              id TEXT PRIMARY KEY,
217              file_id TEXT NOT NULL,
218              json TEXT NOT NULL
219            );
220            CREATE INDEX IF NOT EXISTS idx_tests_file ON tests(file_id);
221            CREATE TABLE IF NOT EXISTS imports (
222              id TEXT PRIMARY KEY,
223              file_id TEXT NOT NULL,
224              imported TEXT NOT NULL,
225              json TEXT NOT NULL
226            );
227            CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file_id);
228            CREATE TABLE IF NOT EXISTS occurrences (
229              id TEXT PRIMARY KEY,
230              symbol_id TEXT NOT NULL,
231              file_id TEXT NOT NULL,
232              is_definition INTEGER NOT NULL,
233              json TEXT NOT NULL
234            );
235            CREATE INDEX IF NOT EXISTS idx_occurrences_symbol ON occurrences(symbol_id);
236            CREATE INDEX IF NOT EXISTS idx_occurrences_file ON occurrences(file_id);
237            CREATE TABLE IF NOT EXISTS analysis_facts (
238              id TEXT PRIMARY KEY,
239              file_id TEXT NOT NULL,
240              source_type TEXT NOT NULL,
241              target TEXT NOT NULL,
242              json TEXT NOT NULL
243            );
244            CREATE INDEX IF NOT EXISTS idx_analysis_facts_file ON analysis_facts(file_id);
245            CREATE INDEX IF NOT EXISTS idx_analysis_facts_source ON analysis_facts(source_type);
246            CREATE TABLE IF NOT EXISTS vector_targets (
247              id TEXT PRIMARY KEY,
248              file_id TEXT NOT NULL,
249              target_kind TEXT NOT NULL,
250              content_hash TEXT NOT NULL,
251              vector_id INTEGER NOT NULL,
252              model TEXT NOT NULL,
253              dimensions INTEGER NOT NULL,
254              json TEXT NOT NULL
255            );
256            CREATE INDEX IF NOT EXISTS idx_vector_targets_file ON vector_targets(file_id);
257            CREATE TABLE IF NOT EXISTS embedding_cache (
258              cache_key TEXT PRIMARY KEY,
259              target_id TEXT NOT NULL,
260              content_hash TEXT NOT NULL,
261              model TEXT NOT NULL,
262              dimensions INTEGER NOT NULL,
263              json TEXT NOT NULL
264            );
265            CREATE TABLE IF NOT EXISTS semantic_index_runs (
266              id TEXT PRIMARY KEY,
267              status TEXT NOT NULL,
268              model TEXT NOT NULL,
269              dimensions INTEGER NOT NULL,
270              vector_count INTEGER NOT NULL,
271              created_at TEXT NOT NULL,
272              json TEXT NOT NULL
273            );
274            CREATE TABLE IF NOT EXISTS semantic_coverage (
275              id TEXT PRIMARY KEY,
276              target_kind TEXT NOT NULL,
277              indexed_count INTEGER NOT NULL,
278              stale_count INTEGER NOT NULL,
279              failed_count INTEGER NOT NULL,
280              json TEXT NOT NULL
281            );
282            CREATE TABLE IF NOT EXISTS graph_nodes (
283              id TEXT PRIMARY KEY,
284              label TEXT NOT NULL,
285              node_type TEXT DEFAULT '',
286              file_id TEXT DEFAULT '',
287              symbol_id TEXT DEFAULT '',
288              json TEXT NOT NULL
289            );
290            CREATE TABLE IF NOT EXISTS graph_edges (
291              id TEXT PRIMARY KEY,
292              from_id TEXT NOT NULL,
293              to_id TEXT NOT NULL,
294              edge_type TEXT NOT NULL,
295              confidence TEXT DEFAULT '',
296              source_type TEXT DEFAULT '',
297              source_file TEXT DEFAULT '',
298              json TEXT NOT NULL
299            );
300            CREATE INDEX IF NOT EXISTS idx_graph_edges_from ON graph_edges(from_id);
301            CREATE INDEX IF NOT EXISTS idx_graph_edges_to ON graph_edges(to_id);
302            "#,
303        )
304        .map_err(storage_err)?;
305        migrate_history_schema(&mut conn)?;
306        migrate_graph_schema(&mut conn)?;
307        Ok(())
308    }
309
310    fn put_manifest(&self, manifest: &IndexManifest) -> Result<()> {
311        let conn = self
312            .connection
313            .lock()
314            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
315        let json = serde_json::to_string(manifest)?;
316        conn.execute(
317            "INSERT INTO manifests(id, json) VALUES(1, ?1) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
318            params![json],
319        )
320        .map_err(storage_err)?;
321        Ok(())
322    }
323
324    fn manifest(&self) -> Result<Option<IndexManifest>> {
325        let conn = self
326            .connection
327            .lock()
328            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
329        let raw: Option<String> = conn
330            .query_row("SELECT json FROM manifests WHERE id = 1", [], |row| {
331                row.get(0)
332            })
333            .optional()
334            .map_err(storage_err)?;
335        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
336            .transpose()
337    }
338
339    fn replace_index(&self, data: IndexData<'_>) -> Result<()> {
340        let mut conn = self
341            .connection
342            .lock()
343            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
344        let tx = conn.transaction().map_err(storage_err)?;
345        tx.execute("DELETE FROM occurrences", [])
346            .map_err(storage_err)?;
347        tx.execute("DELETE FROM analysis_facts", [])
348            .map_err(storage_err)?;
349        tx.execute("DELETE FROM imports", []).map_err(storage_err)?;
350        tx.execute("DELETE FROM tests", []).map_err(storage_err)?;
351        tx.execute("DELETE FROM chunks", []).map_err(storage_err)?;
352        tx.execute("DELETE FROM symbols", []).map_err(storage_err)?;
353        tx.execute("DELETE FROM files", []).map_err(storage_err)?;
354        tx.execute("DELETE FROM manifests", [])
355            .map_err(storage_err)?;
356        tx.execute(
357            "INSERT INTO manifests(id, json) VALUES(1, ?1)",
358            params![serde_json::to_string(data.manifest)?],
359        )
360        .map_err(storage_err)?;
361        insert_index_rows(
362            &tx,
363            IndexRows {
364                files: data.files,
365                symbols: data.symbols,
366                chunks: data.chunks,
367                tests: data.tests,
368                imports: data.imports,
369                occurrences: data.occurrences,
370                analysis_facts: data.analysis_facts,
371            },
372        )?;
373        tx.commit().map_err(storage_err)?;
374        Ok(())
375    }
376
377    fn replace_files_index(&self, update: PartialIndexUpdate<'_>) -> Result<()> {
378        let mut conn = self
379            .connection
380            .lock()
381            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
382        let tx = conn.transaction().map_err(storage_err)?;
383        let affected_file_ids = update
384            .changed_files
385            .iter()
386            .map(|file| file.id.clone())
387            .chain(update.deleted_file_ids.iter().cloned())
388            .collect::<BTreeSet<_>>();
389        let mut affected_file_paths = update
390            .changed_files
391            .iter()
392            .map(|file| file.path.to_string_lossy().to_string())
393            .collect::<BTreeSet<_>>();
394        for file_id in &affected_file_ids {
395            let path: Option<String> = tx
396                .query_row(
397                    "SELECT path FROM files WHERE id = ?1",
398                    params![&file_id.0],
399                    |row| row.get(0),
400                )
401                .optional()
402                .map_err(storage_err)?;
403            if let Some(path) = path {
404                affected_file_paths.insert(path);
405            }
406        }
407
408        let mut affected_symbol_ids = update
409            .symbols
410            .iter()
411            .map(|symbol| symbol.id.clone())
412            .collect::<BTreeSet<_>>();
413        for file_id in &affected_file_ids {
414            let mut stmt = tx
415                .prepare("SELECT id FROM symbols WHERE file_id = ?1")
416                .map_err(storage_err)?;
417            let rows = stmt
418                .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
419                .map_err(storage_err)?;
420            for row in rows {
421                affected_symbol_ids.insert(SymbolId::new(row.map_err(storage_err)?));
422            }
423        }
424
425        let mut affected_node_ids = update
426            .graph_nodes
427            .iter()
428            .map(|node| node.id.0.clone())
429            .collect::<BTreeSet<_>>();
430        for file_id in &affected_file_ids {
431            let mut stmt = tx
432                .prepare("SELECT id FROM graph_nodes WHERE file_id = ?1")
433                .map_err(storage_err)?;
434            let rows = stmt
435                .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
436                .map_err(storage_err)?;
437            for row in rows {
438                affected_node_ids.insert(row.map_err(storage_err)?);
439            }
440        }
441        for symbol_id in &affected_symbol_ids {
442            let mut stmt = tx
443                .prepare("SELECT id FROM graph_nodes WHERE symbol_id = ?1")
444                .map_err(storage_err)?;
445            let rows = stmt
446                .query_map(params![&symbol_id.0], |row| row.get::<_, String>(0))
447                .map_err(storage_err)?;
448            for row in rows {
449                affected_node_ids.insert(row.map_err(storage_err)?);
450            }
451        }
452
453        tx.execute(
454            "INSERT INTO manifests(id, json) VALUES(1, ?1)
455             ON CONFLICT(id) DO UPDATE SET json = excluded.json",
456            params![serde_json::to_string(update.manifest)?],
457        )
458        .map_err(storage_err)?;
459
460        for node_id in &affected_node_ids {
461            tx.execute(
462                "DELETE FROM graph_edges WHERE from_id = ?1 OR to_id = ?1",
463                params![node_id],
464            )
465            .map_err(storage_err)?;
466        }
467        for path in &affected_file_paths {
468            tx.execute(
469                "DELETE FROM graph_edges WHERE source_file = ?1",
470                params![path],
471            )
472            .map_err(storage_err)?;
473        }
474        for node_id in &affected_node_ids {
475            tx.execute("DELETE FROM graph_nodes WHERE id = ?1", params![node_id])
476                .map_err(storage_err)?;
477        }
478        for file_id in &affected_file_ids {
479            tx.execute(
480                "DELETE FROM graph_nodes WHERE file_id = ?1",
481                params![&file_id.0],
482            )
483            .map_err(storage_err)?;
484        }
485        for symbol_id in &affected_symbol_ids {
486            tx.execute(
487                "DELETE FROM graph_nodes WHERE symbol_id = ?1",
488                params![&symbol_id.0],
489            )
490            .map_err(storage_err)?;
491        }
492
493        for symbol_id in &affected_symbol_ids {
494            tx.execute(
495                "DELETE FROM occurrences WHERE symbol_id = ?1",
496                params![&symbol_id.0],
497            )
498            .map_err(storage_err)?;
499        }
500        for file_id in &affected_file_ids {
501            tx.execute(
502                "DELETE FROM occurrences WHERE file_id = ?1",
503                params![&file_id.0],
504            )
505            .map_err(storage_err)?;
506            tx.execute(
507                "DELETE FROM analysis_facts WHERE file_id = ?1",
508                params![&file_id.0],
509            )
510            .map_err(storage_err)?;
511            tx.execute(
512                "DELETE FROM imports WHERE file_id = ?1",
513                params![&file_id.0],
514            )
515            .map_err(storage_err)?;
516            tx.execute("DELETE FROM tests WHERE file_id = ?1", params![&file_id.0])
517                .map_err(storage_err)?;
518            tx.execute("DELETE FROM chunks WHERE file_id = ?1", params![&file_id.0])
519                .map_err(storage_err)?;
520            tx.execute(
521                "DELETE FROM symbols WHERE file_id = ?1",
522                params![&file_id.0],
523            )
524            .map_err(storage_err)?;
525            tx.execute("DELETE FROM files WHERE id = ?1", params![&file_id.0])
526                .map_err(storage_err)?;
527        }
528
529        insert_index_rows(
530            &tx,
531            IndexRows {
532                files: update.changed_files,
533                symbols: update.symbols,
534                chunks: update.chunks,
535                tests: update.tests,
536                imports: update.imports,
537                occurrences: update.occurrences,
538                analysis_facts: update.analysis_facts,
539            },
540        )?;
541        insert_graph_rows(&tx, update.graph_nodes, update.graph_edges)?;
542        tx.commit().map_err(storage_err)?;
543        Ok(())
544    }
545
546    fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>> {
547        let conn = self
548            .connection
549            .lock()
550            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
551        let mut stmt = conn
552            .prepare("SELECT json FROM files ORDER BY path LIMIT ?1 OFFSET ?2")
553            .map_err(storage_err)?;
554        let rows = stmt
555            .query_map(params![limit as i64, offset as i64], |row| {
556                row.get::<_, String>(0)
557            })
558            .map_err(storage_err)?;
559        collect_json(rows)
560    }
561
562    fn get_file_by_path(&self, path: &Path) -> Result<Option<File>> {
563        let conn = self
564            .connection
565            .lock()
566            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
567        let raw: Option<String> = conn
568            .query_row(
569                "SELECT json FROM files WHERE path = ?1",
570                params![path.to_string_lossy().as_ref()],
571                |row| row.get(0),
572            )
573            .optional()
574            .map_err(storage_err)?;
575        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
576            .transpose()
577    }
578
579    fn list_symbols(
580        &self,
581        query: Option<&str>,
582        limit: usize,
583        offset: usize,
584    ) -> Result<Vec<Symbol>> {
585        let conn = self
586            .connection
587            .lock()
588            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
589        let pattern = format!("%{}%", query.unwrap_or_default());
590        let mut stmt = conn
591            .prepare(
592                "SELECT json FROM symbols WHERE (?1 = '%%' OR name LIKE ?1 COLLATE NOCASE OR qualified_name LIKE ?1 COLLATE NOCASE) ORDER BY qualified_name LIMIT ?2 OFFSET ?3",
593            )
594            .map_err(storage_err)?;
595        let rows = stmt
596            .query_map(params![pattern, limit as i64, offset as i64], |row| {
597                row.get::<_, String>(0)
598            })
599            .map_err(storage_err)?;
600        collect_json(rows)
601    }
602
603    fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>> {
604        let conn = self
605            .connection
606            .lock()
607            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
608        let raw: Option<String> = conn
609            .query_row(
610                "SELECT json FROM symbols WHERE id = ?1",
611                params![&id.0],
612                |row| row.get(0),
613            )
614            .optional()
615            .map_err(storage_err)?;
616        raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
617            .transpose()
618    }
619
620    fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>> {
621        let conn = self
622            .connection
623            .lock()
624            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
625        let mut stmt = conn
626            .prepare("SELECT json FROM chunks WHERE file_id = ?1 ORDER BY start_line")
627            .map_err(storage_err)?;
628        let rows = stmt
629            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
630            .map_err(storage_err)?;
631        collect_json(rows)
632    }
633
634    fn all_chunks(&self) -> Result<Vec<CodeChunk>> {
635        let conn = self
636            .connection
637            .lock()
638            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
639        let mut stmt = conn
640            .prepare("SELECT json FROM chunks ORDER BY file_id, start_line")
641            .map_err(storage_err)?;
642        let rows = stmt
643            .query_map([], |row| row.get::<_, String>(0))
644            .map_err(storage_err)?;
645        collect_json(rows)
646    }
647
648    fn tests(&self) -> Result<Vec<TestTarget>> {
649        let conn = self
650            .connection
651            .lock()
652            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
653        let mut stmt = conn
654            .prepare("SELECT json FROM tests ORDER BY file_id")
655            .map_err(storage_err)?;
656        let rows = stmt
657            .query_map([], |row| row.get::<_, String>(0))
658            .map_err(storage_err)?;
659        collect_json(rows)
660    }
661
662    fn imports(&self) -> Result<Vec<Import>> {
663        let conn = self
664            .connection
665            .lock()
666            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
667        let mut stmt = conn
668            .prepare("SELECT json FROM imports ORDER BY file_id")
669            .map_err(storage_err)?;
670        let rows = stmt
671            .query_map([], |row| row.get::<_, String>(0))
672            .map_err(storage_err)?;
673        collect_json(rows)
674    }
675
676    fn analysis_facts(
677        &self,
678        source_type: Option<EvidenceSourceType>,
679        limit: usize,
680    ) -> Result<Vec<AnalysisFact>> {
681        let conn = self
682            .connection
683            .lock()
684            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
685        let limit = limit.min(i64::MAX as usize) as i64;
686        let rows = if let Some(source_type) = source_type {
687            let mut stmt = conn
688                .prepare(
689                    "SELECT json FROM analysis_facts WHERE source_type = ?1 ORDER BY file_id, target LIMIT ?2",
690                )
691                .map_err(storage_err)?;
692            let rows = stmt
693                .query_map(params![source_type_name(&source_type), limit], |row| {
694                    row.get::<_, String>(0)
695                })
696                .map_err(storage_err)?;
697            collect_json(rows)?
698        } else {
699            let mut stmt = conn
700                .prepare("SELECT json FROM analysis_facts ORDER BY file_id, target LIMIT ?1")
701                .map_err(storage_err)?;
702            let rows = stmt
703                .query_map(params![limit], |row| row.get::<_, String>(0))
704                .map_err(storage_err)?;
705            collect_json(rows)?
706        };
707        Ok(rows)
708    }
709
710    fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>> {
711        let conn = self
712            .connection
713            .lock()
714            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
715        let mut stmt = conn
716            .prepare(
717                "SELECT json FROM occurrences WHERE symbol_id = ?1 AND is_definition = 0 ORDER BY file_id LIMIT ?2",
718            )
719            .map_err(storage_err)?;
720        let rows = stmt
721            .query_map(params![&id.0, limit as i64], |row| row.get::<_, String>(0))
722            .map_err(storage_err)?;
723        collect_json(rows)
724    }
725
726    fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>> {
727        let conn = self
728            .connection
729            .lock()
730            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
731        let mut stmt = conn
732            .prepare("SELECT json FROM occurrences WHERE file_id = ?1 ORDER BY symbol_id")
733            .map_err(storage_err)?;
734        let rows = stmt
735            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
736            .map_err(storage_err)?;
737        collect_json(rows)
738    }
739
740    fn symbols_for_file(&self, file_id: &FileId) -> Result<Vec<Symbol>> {
741        let conn = self
742            .connection
743            .lock()
744            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
745        let mut stmt = conn
746            .prepare("SELECT json FROM symbols WHERE file_id = ?1 ORDER BY name")
747            .map_err(storage_err)?;
748        let rows = stmt
749            .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
750            .map_err(storage_err)?;
751        collect_json(rows)
752    }
753
754    fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
755        let conn = self
756            .connection
757            .lock()
758            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
759        let pattern = format!("%{}%", query);
760        let mut stmt = conn
761            .prepare("SELECT json FROM chunks WHERE text LIKE ?1 LIMIT ?2")
762            .map_err(storage_err)?;
763        let rows = stmt
764            .query_map(params![pattern, limit as i64], |row| {
765                row.get::<_, String>(0)
766            })
767            .map_err(storage_err)?;
768        collect_json(rows)
769    }
770
771    fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
772        let conn = self
773            .connection
774            .lock()
775            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
776        let match_pat = format!("%{}%", pattern);
777        let mut stmt = conn
778            .prepare("SELECT json FROM files WHERE path LIKE ?1 COLLATE NOCASE")
779            .map_err(storage_err)?;
780        let rows = stmt
781            .query_map(params![match_pat], |row| row.get::<_, String>(0))
782            .map_err(storage_err)?;
783        collect_json(rows)
784    }
785
786    fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
787        if file_ids.is_empty() {
788            return Ok(Vec::new());
789        }
790        let conn = self
791            .connection
792            .lock()
793            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
794
795        let placeholders = file_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
796        let sql = format!("SELECT json FROM tests WHERE file_id IN ({})", placeholders);
797        let mut stmt = conn.prepare(&sql).map_err(storage_err)?;
798
799        let params = rusqlite::params_from_iter(file_ids.iter().map(|id| &id.0));
800        let rows = stmt
801            .query_map(params, |row| row.get::<_, String>(0))
802            .map_err(storage_err)?;
803        collect_json(rows)
804    }
805}
806
807impl HistoryStore for SqliteStore {
808    fn put_history_snapshot(&self, snapshot: &HistorySnapshot) -> Result<()> {
809        validate_history_snapshot(snapshot)?;
810        let mut conn = self
811            .connection
812            .lock()
813            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
814        let tx = conn.transaction().map_err(storage_err)?;
815
816        tx.execute("DELETE FROM git_review_events", [])
817            .map_err(storage_err)?;
818        tx.execute("DELETE FROM history_hotspots", [])
819            .map_err(storage_err)?;
820        tx.execute("DELETE FROM git_cochange_edges", [])
821            .map_err(storage_err)?;
822        tx.execute("DELETE FROM git_symbol_touches", [])
823            .map_err(storage_err)?;
824        tx.execute("DELETE FROM git_file_touches", [])
825            .map_err(storage_err)?;
826        tx.execute("DELETE FROM git_commits", [])
827            .map_err(storage_err)?;
828
829        for commit in &snapshot.commits {
830            tx.execute(
831                "INSERT INTO git_commits(id, authored_at, committed_at, author_email, json) VALUES(?1, ?2, ?3, ?4, ?5)",
832                params![
833                    &commit.id.0,
834                    commit.authored_at.to_rfc3339(),
835                    commit.committed_at.to_rfc3339(),
836                    commit.author.email.as_deref(),
837                    serde_json::to_string(commit)?,
838                ],
839            )
840            .map_err(storage_err)?;
841        }
842        for touch in &snapshot.file_touches {
843            tx.execute(
844                "INSERT INTO git_file_touches(id, commit_id, path, previous_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
845                params![
846                    &touch.id.0,
847                    &touch.commit_id.0,
848                    history_path(&touch.path)?,
849                    touch
850                        .previous_path
851                        .as_deref()
852                        .map(history_path)
853                        .transpose()?,
854                    touch.touched_at.to_rfc3339(),
855                    serde_json::to_string(touch)?,
856                ],
857            )
858            .map_err(storage_err)?;
859        }
860        for touch in &snapshot.symbol_touches {
861            tx.execute(
862                "INSERT INTO git_symbol_touches(id, commit_id, symbol_id, qualified_name, file_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
863                params![
864                    &touch.id.0,
865                    &touch.commit_id.0,
866                    touch.symbol_id.as_ref().map(|id| id.0.as_str()),
867                    &touch.qualified_name,
868                    history_path(&touch.file_path)?,
869                    touch.touched_at.to_rfc3339(),
870                    serde_json::to_string(touch)?,
871                ],
872            )
873            .map_err(storage_err)?;
874        }
875        for edge in &snapshot.cochange_edges {
876            tx.execute(
877                "INSERT INTO git_cochange_edges(id, path, cochanged_path, commit_count, recency_weight, last_changed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
878                params![
879                    &edge.id.0,
880                    history_path(&edge.path)?,
881                    history_path(&edge.cochanged_path)?,
882                    usize_to_i64(edge.commit_count, "co-change commit count")?,
883                    edge.recency_weight,
884                    edge.last_changed_at.map(|value| value.to_rfc3339()),
885                    serde_json::to_string(edge)?,
886                ],
887            )
888            .map_err(storage_err)?;
889        }
890        for evidence in &snapshot.reviewer_evidence {
891            let reviewer_identity = evidence
892                .reviewer
893                .email
894                .as_deref()
895                .unwrap_or(&evidence.reviewer.name);
896            tx.execute(
897                "INSERT INTO git_review_events(id, commit_id, path, reviewer_identity, observed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
898                params![
899                    &evidence.id.0,
900                    evidence.commit_id.as_ref().map(|id| id.0.as_str()),
901                    evidence.path.as_deref().map(history_path).transpose()?,
902                    reviewer_identity,
903                    evidence.observed_at.to_rfc3339(),
904                    serde_json::to_string(evidence)?,
905                ],
906            )
907            .map_err(storage_err)?;
908        }
909        for summary in materialize_churn_summaries(snapshot)? {
910            tx.execute(
911                "INSERT INTO history_hotspots(entity_kind, entity_key, path, symbol_id, qualified_name, hotspot_score, touch_count, generated_at, json)
912                 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
913                params![
914                    churn_entity_kind_key(summary.entity_kind),
915                    &summary.key,
916                    summary.path.as_deref().map(history_path).transpose()?,
917                    summary.symbol_id.as_ref().map(|id| id.0.as_str()),
918                    summary.qualified_name.as_deref(),
919                    summary.stats.hotspot_score,
920                    usize_to_i64(summary.stats.touch_count, "history hotspot touch count")?,
921                    summary.generated_at.to_rfc3339(),
922                    serde_json::to_string(&summary)?,
923                ],
924            )
925            .map_err(storage_err)?;
926        }
927
928        tx.commit().map_err(storage_err)?;
929        Ok(())
930    }
931
932    fn history_for_file(&self, path: &Path, limit: usize) -> Result<HistorySummary> {
933        let normalized_path = history_path(path)?;
934        if limit == 0 {
935            return Ok(HistorySummary {
936                path: path.to_path_buf(),
937                recent_commits: Vec::new(),
938                file_touches: Vec::new(),
939                symbol_touches: Vec::new(),
940                cochange_neighbors: Vec::new(),
941                reviewer_evidence: Vec::new(),
942                truncated: false,
943                uncertainty: vec!["history query limit is zero".into()],
944            });
945        }
946
947        let conn = self
948            .connection
949            .lock()
950            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
951        let query_limit = history_query_limit(limit);
952
953        let mut commit_stmt = conn
954            .prepare(
955                "SELECT c.json FROM git_commits c
956                 WHERE EXISTS (
957                   SELECT 1 FROM git_file_touches t
958                   WHERE t.commit_id = c.id AND (t.path = ?1 OR t.previous_path = ?1)
959                 )
960                 ORDER BY c.committed_at DESC, c.id
961                 LIMIT ?2",
962            )
963            .map_err(storage_err)?;
964        let commit_rows = commit_stmt
965            .query_map(params![&normalized_path, query_limit], |row| {
966                row.get::<_, String>(0)
967            })
968            .map_err(storage_err)?;
969        let (recent_commits, commits_truncated) = collect_limited_json(commit_rows, limit)?;
970
971        let mut file_touch_stmt = conn
972            .prepare(
973                "SELECT json FROM git_file_touches
974                 WHERE path = ?1 OR previous_path = ?1
975                 ORDER BY touched_at DESC, id
976                 LIMIT ?2",
977            )
978            .map_err(storage_err)?;
979        let file_touch_rows = file_touch_stmt
980            .query_map(params![&normalized_path, query_limit], |row| {
981                row.get::<_, String>(0)
982            })
983            .map_err(storage_err)?;
984        let (file_touches, file_touches_truncated) = collect_limited_json(file_touch_rows, limit)?;
985
986        let mut symbol_touch_stmt = conn
987            .prepare(
988                "SELECT json FROM git_symbol_touches
989                 WHERE file_path = ?1
990                 ORDER BY touched_at DESC, id
991                 LIMIT ?2",
992            )
993            .map_err(storage_err)?;
994        let symbol_touch_rows = symbol_touch_stmt
995            .query_map(params![&normalized_path, query_limit], |row| {
996                row.get::<_, String>(0)
997            })
998            .map_err(storage_err)?;
999        let (symbol_touches, symbol_touches_truncated) =
1000            collect_limited_json(symbol_touch_rows, limit)?;
1001
1002        let mut cochange_stmt = conn
1003            .prepare(
1004                "SELECT json FROM git_cochange_edges
1005                 WHERE path = ?1
1006                 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
1007                 LIMIT ?2",
1008            )
1009            .map_err(storage_err)?;
1010        let cochange_rows = cochange_stmt
1011            .query_map(params![&normalized_path, query_limit], |row| {
1012                row.get::<_, String>(0)
1013            })
1014            .map_err(storage_err)?;
1015        let (cochange_neighbors, cochange_truncated) = collect_limited_json(cochange_rows, limit)?;
1016
1017        let mut reviewer_stmt = conn
1018            .prepare(
1019                "SELECT e.json FROM git_review_events e
1020                 WHERE e.path = ?1
1021                    OR (
1022                      e.path IS NULL
1023                      AND e.commit_id IN (
1024                        SELECT t.commit_id FROM git_file_touches t
1025                        WHERE t.path = ?1 OR t.previous_path = ?1
1026                      )
1027                    )
1028                 ORDER BY e.observed_at DESC, e.id
1029                 LIMIT ?2",
1030            )
1031            .map_err(storage_err)?;
1032        let reviewer_rows = reviewer_stmt
1033            .query_map(params![&normalized_path, query_limit], |row| {
1034                row.get::<_, String>(0)
1035            })
1036            .map_err(storage_err)?;
1037        let (reviewer_evidence, reviewers_truncated) = collect_limited_json(reviewer_rows, limit)?;
1038
1039        let truncated = commits_truncated
1040            || file_touches_truncated
1041            || symbol_touches_truncated
1042            || cochange_truncated
1043            || reviewers_truncated;
1044        let mut uncertainty = Vec::new();
1045        if recent_commits.is_empty()
1046            && file_touches.is_empty()
1047            && symbol_touches.is_empty()
1048            && cochange_neighbors.is_empty()
1049            && reviewer_evidence.is_empty()
1050        {
1051            uncertainty.push("no persisted history evidence is available for this path".into());
1052        } else {
1053            if symbol_touches.is_empty() {
1054                uncertainty.push("no symbol-level history is stored for this path".into());
1055            }
1056            if reviewer_evidence.is_empty() {
1057                uncertainty.push("no reviewer or owner evidence is stored for this path".into());
1058            }
1059        }
1060        if truncated {
1061            uncertainty.push(format!(
1062                "history results are truncated to {limit} records per category"
1063            ));
1064        }
1065
1066        Ok(HistorySummary {
1067            path: path.to_path_buf(),
1068            recent_commits,
1069            file_touches,
1070            symbol_touches,
1071            cochange_neighbors,
1072            reviewer_evidence,
1073            truncated,
1074            uncertainty,
1075        })
1076    }
1077
1078    fn provenance_for_path(&self, path: &Path, limit: usize) -> Result<FileProvenance> {
1079        let normalized_path = history_path(path)?;
1080        if limit == 0 {
1081            return Ok(FileProvenance {
1082                path: path.to_path_buf(),
1083                first_seen: None,
1084                last_touched: None,
1085                recent_touches: Vec::new(),
1086                confidence: Confidence::Low,
1087                truncated: false,
1088                uncertainty: vec!["provenance query limit is zero".into()],
1089            });
1090        }
1091
1092        let conn = self
1093            .connection
1094            .lock()
1095            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1096        let query_limit = history_query_limit(limit);
1097        let aliases = "
1098            WITH RECURSIVE aliases(path) AS (
1099              SELECT ?1
1100              UNION
1101              SELECT t.previous_path
1102              FROM git_file_touches t JOIN aliases a ON t.path = a.path
1103              WHERE t.previous_path IS NOT NULL
1104              UNION
1105              SELECT t.path
1106              FROM git_file_touches t JOIN aliases a ON t.previous_path = a.path
1107            )";
1108        let recent_sql = format!(
1109            "{aliases}
1110             SELECT DISTINCT t.json, c.json
1111             FROM git_file_touches t
1112             JOIN git_commits c ON c.id = t.commit_id
1113             WHERE t.path IN aliases OR t.previous_path IN aliases
1114             ORDER BY t.touched_at DESC, t.id
1115             LIMIT ?2"
1116        );
1117        let mut recent_stmt = conn.prepare(&recent_sql).map_err(storage_err)?;
1118        let rows = recent_stmt
1119            .query_map(params![&normalized_path, query_limit], |row| {
1120                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1121            })
1122            .map_err(storage_err)?;
1123        let mut recent_touches = collect_provenance_rows(rows, file_provenance_touch)?;
1124        let truncated = recent_touches.len() > limit;
1125        recent_touches.truncate(limit);
1126
1127        let first_sql = format!(
1128            "{aliases}
1129             SELECT DISTINCT t.json, c.json
1130             FROM git_file_touches t
1131             JOIN git_commits c ON c.id = t.commit_id
1132             WHERE t.path IN aliases OR t.previous_path IN aliases
1133             ORDER BY t.touched_at ASC, t.id
1134             LIMIT 1"
1135        );
1136        let first_seen = conn
1137            .query_row(&first_sql, params![&normalized_path], |row| {
1138                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1139            })
1140            .optional()
1141            .map_err(storage_err)?
1142            .map(|(touch, commit)| file_provenance_touch(&touch, &commit))
1143            .transpose()?;
1144        let last_touched = recent_touches.first().cloned();
1145        let mut uncertainty = Vec::new();
1146        if first_seen.is_none() {
1147            uncertainty.push("no persisted commit provenance is available for this path".into());
1148        } else if first_seen
1149            .as_ref()
1150            .is_some_and(|touch| touch.change_kind != open_kioku_core::GitChangeKind::Added)
1151        {
1152            uncertainty.push(
1153                "first_seen is the earliest persisted touch in the configured local history window, not a proven file-creation commit"
1154                    .into(),
1155            );
1156        }
1157        if truncated {
1158            uncertainty.push(format!(
1159                "recent provenance is truncated to {limit} touch records"
1160            ));
1161        }
1162
1163        let confidence = if uncertainty.is_empty() {
1164            Confidence::Exact
1165        } else if last_touched.is_some() {
1166            Confidence::High
1167        } else {
1168            Confidence::Low
1169        };
1170        Ok(FileProvenance {
1171            path: path.to_path_buf(),
1172            first_seen,
1173            last_touched,
1174            recent_touches,
1175            confidence,
1176            truncated,
1177            uncertainty,
1178        })
1179    }
1180
1181    fn churn_for_file(&self, path: &Path) -> Result<ChurnSummary> {
1182        let normalized_path = history_path(path)?;
1183        self.churn_by_kind_and_key(ChurnEntityKind::File, &normalized_path, || {
1184            ChurnSummary::missing(ChurnEntityKind::File, normalized_path.clone())
1185        })
1186    }
1187
1188    fn churn_for_module(&self, module: &Path) -> Result<ChurnSummary> {
1189        let normalized_module = if module == Path::new(".") || module.as_os_str().is_empty() {
1190            "__root__".to_string()
1191        } else {
1192            history_path(module)?
1193        };
1194        self.churn_by_kind_and_key(ChurnEntityKind::Module, &normalized_module, || {
1195            ChurnSummary::missing(ChurnEntityKind::Module, normalized_module.clone())
1196        })
1197    }
1198
1199    fn churn_for_symbol(&self, symbol_id: &SymbolId) -> Result<ChurnSummary> {
1200        self.churn_by_kind_and_key(ChurnEntityKind::Symbol, &symbol_id.0, || {
1201            let mut summary = ChurnSummary::missing(ChurnEntityKind::Symbol, symbol_id.0.clone());
1202            summary.symbol_id = Some(symbol_id.clone());
1203            summary.uncertainty =
1204                vec!["no persisted symbol-level churn is available for this symbol".into()];
1205            summary
1206        })
1207    }
1208
1209    fn provenance_for_symbol(
1210        &self,
1211        symbol_id: &SymbolId,
1212        limit: usize,
1213    ) -> Result<SymbolProvenance> {
1214        let conn = self
1215            .connection
1216            .lock()
1217            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1218        let symbol_json: Option<String> = conn
1219            .query_row(
1220                "SELECT json FROM symbols WHERE id = ?1",
1221                params![&symbol_id.0],
1222                |row| row.get(0),
1223            )
1224            .optional()
1225            .map_err(storage_err)?;
1226        let Some(symbol_json) = symbol_json else {
1227            return Err(OkError::SymbolNotFound(symbol_id.0.clone()));
1228        };
1229        let symbol: Symbol = serde_json::from_str(&symbol_json)?;
1230        let file_path: String = conn
1231            .query_row(
1232                "SELECT path FROM files WHERE id = ?1",
1233                params![&symbol.file_id.0],
1234                |row| row.get(0),
1235            )
1236            .map_err(storage_err)?;
1237        if limit == 0 {
1238            return Ok(SymbolProvenance {
1239                symbol_id: symbol.id,
1240                qualified_name: symbol.qualified_name,
1241                file_path: PathBuf::from(file_path),
1242                range: symbol.range,
1243                first_seen: None,
1244                last_touched: None,
1245                recent_touches: Vec::new(),
1246                confidence: Confidence::Low,
1247                truncated: false,
1248                uncertainty: vec!["provenance query limit is zero".into()],
1249            });
1250        }
1251
1252        let query_limit = history_query_limit(limit);
1253        let mut recent_stmt = conn
1254            .prepare(
1255                "SELECT t.json, c.json
1256                 FROM git_symbol_touches t
1257                 JOIN git_commits c ON c.id = t.commit_id
1258                 WHERE t.symbol_id = ?1
1259                 ORDER BY t.touched_at DESC, t.id
1260                 LIMIT ?2",
1261            )
1262            .map_err(storage_err)?;
1263        let rows = recent_stmt
1264            .query_map(params![&symbol_id.0, query_limit], |row| {
1265                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1266            })
1267            .map_err(storage_err)?;
1268        let mut recent_touches = collect_provenance_rows(rows, symbol_provenance_touch)?;
1269        let truncated = recent_touches.len() > limit;
1270        recent_touches.truncate(limit);
1271        let first_seen = conn
1272            .query_row(
1273                "SELECT t.json, c.json
1274                 FROM git_symbol_touches t
1275                 JOIN git_commits c ON c.id = t.commit_id
1276                 WHERE t.symbol_id = ?1
1277                 ORDER BY t.touched_at ASC, t.id
1278                 LIMIT 1",
1279                params![&symbol_id.0],
1280                |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)),
1281            )
1282            .optional()
1283            .map_err(storage_err)?
1284            .map(|(touch, commit)| symbol_provenance_touch(&touch, &commit))
1285            .transpose()?;
1286        let last_touched = recent_touches.first().cloned();
1287        let mut uncertainty = recent_touches
1288            .iter()
1289            .flat_map(|touch| touch.uncertainty.clone())
1290            .collect::<Vec<_>>();
1291        if let Some(first_seen) = &first_seen {
1292            uncertainty.extend(first_seen.uncertainty.clone());
1293            uncertainty.push(
1294                "first_seen is the earliest line-mapped touch in the configured local history window; it may not be the symbol-introduction commit"
1295                    .into(),
1296            );
1297        } else {
1298            uncertainty
1299                .push("no persisted line-level commit mapping is available for this symbol".into());
1300        }
1301        if symbol.range.is_none() {
1302            uncertainty.push(
1303                "the indexed symbol has no line range, so commit hunks cannot be mapped".into(),
1304            );
1305        }
1306        if truncated {
1307            uncertainty.push(format!(
1308                "recent provenance is truncated to {limit} touch records"
1309            ));
1310        }
1311        uncertainty.sort();
1312        uncertainty.dedup();
1313        let confidence = recent_touches
1314            .iter()
1315            .map(|touch| touch.confidence)
1316            .chain(first_seen.iter().map(|touch| touch.confidence))
1317            .reduce(lower_history_confidence)
1318            .unwrap_or(Confidence::Low);
1319
1320        Ok(SymbolProvenance {
1321            symbol_id: symbol.id,
1322            qualified_name: symbol.qualified_name,
1323            file_path: PathBuf::from(file_path),
1324            range: symbol.range,
1325            first_seen,
1326            last_touched,
1327            recent_touches,
1328            confidence,
1329            truncated,
1330            uncertainty,
1331        })
1332    }
1333
1334    fn similar_changes(
1335        &self,
1336        query: &SimilarChangeQuery,
1337        limit: usize,
1338    ) -> Result<SimilarChangeReport> {
1339        let normalized_query = normalize_similar_change_query(query)?;
1340        if limit == 0 {
1341            return Ok(SimilarChangeReport {
1342                query: normalized_query,
1343                generated_at: Utc::now(),
1344                hits: Vec::new(),
1345                truncated: false,
1346                uncertainty: vec!["similar-change query limit is zero".into()],
1347            });
1348        }
1349
1350        let task_tokens = normalized_query
1351            .task
1352            .as_deref()
1353            .map(tokenize_similarity_text)
1354            .unwrap_or_default();
1355        let query_paths = normalized_query
1356            .paths
1357            .iter()
1358            .map(|path| history_path(path))
1359            .collect::<Result<BTreeSet<_>>>()?;
1360        let symbol_queries = normalized_query
1361            .symbols
1362            .iter()
1363            .map(|symbol| symbol.to_lowercase())
1364            .collect::<BTreeSet<_>>();
1365
1366        if task_tokens.is_empty() && query_paths.is_empty() && symbol_queries.is_empty() {
1367            return Ok(SimilarChangeReport {
1368                query: normalized_query,
1369                generated_at: Utc::now(),
1370                hits: Vec::new(),
1371                truncated: false,
1372                uncertainty: vec![
1373                    "provide at least one task, path, or symbol similarity signal".into(),
1374                ],
1375            });
1376        }
1377
1378        let conn = self
1379            .connection
1380            .lock()
1381            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1382        let scan_limit = similar_history_scan_limit(limit);
1383
1384        let commits = load_similarity_commits(&conn, scan_limit)?;
1385        if commits.is_empty() {
1386            return Ok(SimilarChangeReport {
1387                query: normalized_query,
1388                generated_at: Utc::now(),
1389                hits: Vec::new(),
1390                truncated: false,
1391                uncertainty: vec!["no persisted commit history is available".into()],
1392            });
1393        }
1394        let file_touches = load_similarity_file_touches(&conn, scan_limit)?;
1395        let symbol_touches = load_similarity_symbol_touches(&conn, scan_limit)?;
1396        let cochange_edges = load_similarity_cochange_edges(&conn)?;
1397        let hotspots = load_similarity_file_hotspots(&conn)?;
1398
1399        let mut file_touches_by_commit: BTreeMap<String, Vec<GitFileTouch>> = BTreeMap::new();
1400        for touch in file_touches {
1401            file_touches_by_commit
1402                .entry(touch.commit_id.0.clone())
1403                .or_default()
1404                .push(touch);
1405        }
1406
1407        let mut symbol_touches_by_commit: BTreeMap<String, Vec<GitSymbolTouch>> = BTreeMap::new();
1408        for touch in symbol_touches {
1409            symbol_touches_by_commit
1410                .entry(touch.commit_id.0.clone())
1411                .or_default()
1412                .push(touch);
1413        }
1414
1415        let mut query_neighbors: BTreeMap<String, Vec<GitCochangeEdge>> = BTreeMap::new();
1416        let mut sample_edges_by_commit: BTreeMap<String, Vec<GitCochangeEdge>> = BTreeMap::new();
1417        for edge in cochange_edges {
1418            let path = history_path(&edge.path)?;
1419            let cochanged_path = history_path(&edge.cochanged_path)?;
1420            let touches_query_path =
1421                query_paths.contains(&path) || query_paths.contains(&cochanged_path);
1422            if query_paths.contains(&path) {
1423                query_neighbors
1424                    .entry(cochanged_path.clone())
1425                    .or_default()
1426                    .push(edge.clone());
1427            }
1428            if query_paths.contains(&cochanged_path) {
1429                query_neighbors
1430                    .entry(path.clone())
1431                    .or_default()
1432                    .push(edge.clone());
1433            }
1434            if touches_query_path {
1435                for commit_id in &edge.sample_commits {
1436                    sample_edges_by_commit
1437                        .entry(commit_id.0.clone())
1438                        .or_default()
1439                        .push(edge.clone());
1440                }
1441            }
1442        }
1443
1444        let query_related_paths = query_paths
1445            .iter()
1446            .cloned()
1447            .chain(query_neighbors.keys().cloned())
1448            .collect::<BTreeSet<_>>();
1449
1450        let mut hits = Vec::new();
1451        for commit in commits {
1452            let file_touches = file_touches_by_commit
1453                .get(&commit.id.0)
1454                .map(Vec::as_slice)
1455                .unwrap_or(&[]);
1456            let symbol_touches = symbol_touches_by_commit
1457                .get(&commit.id.0)
1458                .map(Vec::as_slice)
1459                .unwrap_or(&[]);
1460
1461            let candidate = score_similar_commit(
1462                &normalized_query,
1463                &task_tokens,
1464                &query_paths,
1465                &symbol_queries,
1466                &query_neighbors,
1467                &query_related_paths,
1468                &sample_edges_by_commit,
1469                &hotspots,
1470                &commit,
1471                file_touches,
1472                symbol_touches,
1473            )?;
1474            if candidate.score > 0.0 {
1475                hits.push(candidate);
1476            }
1477        }
1478
1479        hits.sort_by(|left, right| {
1480            right
1481                .score
1482                .total_cmp(&left.score)
1483                .then_with(|| {
1484                    history_confidence_rank(right.confidence)
1485                        .cmp(&history_confidence_rank(left.confidence))
1486                })
1487                .then_with(|| {
1488                    right
1489                        .change
1490                        .commit
1491                        .committed_at
1492                        .cmp(&left.change.commit.committed_at)
1493                })
1494                .then_with(|| left.change.commit.id.0.cmp(&right.change.commit.id.0))
1495        });
1496        let truncated = hits.len() > limit;
1497        hits.truncate(limit);
1498
1499        let mut uncertainty = Vec::new();
1500        if hits.is_empty() {
1501            uncertainty.push("no similar historical changes matched the query signals".into());
1502        }
1503        if truncated {
1504            uncertainty.push(format!(
1505                "similar-change results are truncated to {limit} hits"
1506            ));
1507        }
1508
1509        Ok(SimilarChangeReport {
1510            query: normalized_query,
1511            generated_at: Utc::now(),
1512            hits,
1513            truncated,
1514            uncertainty,
1515        })
1516    }
1517
1518    fn cochange_neighbors(&self, path: &Path, limit: usize) -> Result<Vec<GitCochangeEdge>> {
1519        if limit == 0 {
1520            return Ok(Vec::new());
1521        }
1522        let normalized_path = history_path(path)?;
1523        let conn = self
1524            .connection
1525            .lock()
1526            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1527        let mut stmt = conn
1528            .prepare(
1529                "SELECT json FROM git_cochange_edges
1530                 WHERE path = ?1
1531                 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
1532                 LIMIT ?2",
1533            )
1534            .map_err(storage_err)?;
1535        let rows = stmt
1536            .query_map(
1537                params![normalized_path, limit.min(i64::MAX as usize) as i64],
1538                |row| row.get::<_, String>(0),
1539            )
1540            .map_err(storage_err)?;
1541        collect_json(rows)
1542    }
1543
1544    fn recent_commits(&self, limit: usize) -> Result<Vec<GitCommitRecord>> {
1545        if limit == 0 {
1546            return Ok(Vec::new());
1547        }
1548        let conn = self
1549            .connection
1550            .lock()
1551            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1552        let mut stmt = conn
1553            .prepare("SELECT json FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1")
1554            .map_err(storage_err)?;
1555        let rows = stmt
1556            .query_map(params![limit.min(i64::MAX as usize) as i64], |row| {
1557                row.get::<_, String>(0)
1558            })
1559            .map_err(storage_err)?;
1560        collect_json(rows)
1561    }
1562}
1563
1564fn collect_provenance_rows<F>(
1565    rows: rusqlite::MappedRows<'_, F>,
1566    decode: fn(&str, &str) -> Result<ProvenanceTouch>,
1567) -> Result<Vec<ProvenanceTouch>>
1568where
1569    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<(String, String)>,
1570{
1571    let mut touches = Vec::new();
1572    for row in rows {
1573        let (touch, commit) = row.map_err(storage_err)?;
1574        touches.push(decode(&touch, &commit)?);
1575    }
1576    Ok(touches)
1577}
1578
1579fn file_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1580    let touch: GitFileTouch = serde_json::from_str(touch)?;
1581    let commit: GitCommitRecord = serde_json::from_str(commit)?;
1582    Ok(ProvenanceTouch {
1583        commit,
1584        path: touch.path,
1585        previous_path: touch.previous_path,
1586        symbol_id: None,
1587        qualified_name: None,
1588        change_kind: touch.change_kind,
1589        line_ranges: Vec::new(),
1590        confidence: Confidence::Exact,
1591        uncertainty: Vec::new(),
1592    })
1593}
1594
1595fn symbol_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1596    let touch: GitSymbolTouch = serde_json::from_str(touch)?;
1597    let commit: GitCommitRecord = serde_json::from_str(commit)?;
1598    Ok(ProvenanceTouch {
1599        commit,
1600        path: touch.file_path,
1601        previous_path: None,
1602        symbol_id: touch.symbol_id,
1603        qualified_name: Some(touch.qualified_name),
1604        change_kind: touch.change_kind,
1605        line_ranges: touch.line_ranges,
1606        confidence: touch.confidence,
1607        uncertainty: touch.uncertainty,
1608    })
1609}
1610
1611fn normalize_similar_change_query(query: &SimilarChangeQuery) -> Result<SimilarChangeQuery> {
1612    let task = query
1613        .task
1614        .as_deref()
1615        .map(str::trim)
1616        .filter(|value| !value.is_empty())
1617        .map(str::to_string);
1618
1619    let mut paths = BTreeSet::new();
1620    for path in &query.paths {
1621        paths.insert(PathBuf::from(history_path(path)?));
1622    }
1623
1624    let mut symbols = BTreeSet::new();
1625    for symbol in &query.symbols {
1626        let symbol = symbol.trim();
1627        if !symbol.is_empty() {
1628            symbols.insert(symbol.to_string());
1629        }
1630    }
1631
1632    Ok(SimilarChangeQuery {
1633        task,
1634        paths: paths.into_iter().collect(),
1635        symbols: symbols.into_iter().collect(),
1636    })
1637}
1638
1639fn similar_history_scan_limit(limit: usize) -> i64 {
1640    limit
1641        .saturating_mul(80)
1642        .clamp(500, 5_000)
1643        .min(i64::MAX as usize) as i64
1644}
1645
1646fn load_similarity_commits(conn: &Connection, scan_limit: i64) -> Result<Vec<GitCommitRecord>> {
1647    let mut stmt = conn
1648        .prepare("SELECT json FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1")
1649        .map_err(storage_err)?;
1650    let rows = stmt
1651        .query_map(params![scan_limit], |row| row.get::<_, String>(0))
1652        .map_err(storage_err)?;
1653    collect_json(rows)
1654}
1655
1656fn load_similarity_file_touches(conn: &Connection, scan_limit: i64) -> Result<Vec<GitFileTouch>> {
1657    let mut stmt = conn
1658        .prepare(
1659            "SELECT t.json
1660             FROM git_file_touches t
1661             JOIN (
1662               SELECT id FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1
1663             ) recent ON recent.id = t.commit_id
1664             ORDER BY t.touched_at DESC, t.id",
1665        )
1666        .map_err(storage_err)?;
1667    let rows = stmt
1668        .query_map(params![scan_limit], |row| row.get::<_, String>(0))
1669        .map_err(storage_err)?;
1670    collect_json(rows)
1671}
1672
1673fn load_similarity_symbol_touches(
1674    conn: &Connection,
1675    scan_limit: i64,
1676) -> Result<Vec<GitSymbolTouch>> {
1677    let mut stmt = conn
1678        .prepare(
1679            "SELECT t.json
1680             FROM git_symbol_touches t
1681             JOIN (
1682               SELECT id FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1
1683             ) recent ON recent.id = t.commit_id
1684             ORDER BY t.touched_at DESC, t.id",
1685        )
1686        .map_err(storage_err)?;
1687    let rows = stmt
1688        .query_map(params![scan_limit], |row| row.get::<_, String>(0))
1689        .map_err(storage_err)?;
1690    collect_json(rows)
1691}
1692
1693fn load_similarity_cochange_edges(conn: &Connection) -> Result<Vec<GitCochangeEdge>> {
1694    let mut stmt = conn
1695        .prepare(
1696            "SELECT json FROM git_cochange_edges
1697             ORDER BY recency_weight DESC, commit_count DESC, path, cochanged_path
1698             LIMIT 5000",
1699        )
1700        .map_err(storage_err)?;
1701    let rows = stmt
1702        .query_map([], |row| row.get::<_, String>(0))
1703        .map_err(storage_err)?;
1704    collect_json(rows)
1705}
1706
1707fn load_similarity_file_hotspots(conn: &Connection) -> Result<BTreeMap<String, ChurnSummary>> {
1708    let mut stmt = conn
1709        .prepare(
1710            "SELECT entity_key, json FROM history_hotspots
1711             WHERE entity_kind = 'file'
1712             ORDER BY hotspot_score DESC, touch_count DESC, entity_key
1713             LIMIT 5000",
1714        )
1715        .map_err(storage_err)?;
1716    let rows = stmt
1717        .query_map([], |row| {
1718            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1719        })
1720        .map_err(storage_err)?;
1721    let mut out = BTreeMap::new();
1722    for row in rows {
1723        let (key, json) = row.map_err(storage_err)?;
1724        out.insert(key, serde_json::from_str(&json)?);
1725    }
1726    Ok(out)
1727}
1728
1729#[allow(clippy::too_many_arguments)]
1730fn score_similar_commit(
1731    query: &SimilarChangeQuery,
1732    task_tokens: &BTreeSet<String>,
1733    query_paths: &BTreeSet<String>,
1734    symbol_queries: &BTreeSet<String>,
1735    query_neighbors: &BTreeMap<String, Vec<GitCochangeEdge>>,
1736    query_related_paths: &BTreeSet<String>,
1737    sample_edges_by_commit: &BTreeMap<String, Vec<GitCochangeEdge>>,
1738    hotspots: &BTreeMap<String, ChurnSummary>,
1739    commit: &GitCommitRecord,
1740    file_touches: &[GitFileTouch],
1741    symbol_touches: &[GitSymbolTouch],
1742) -> Result<SimilarChangeHit> {
1743    let mut score = 0.0_f32;
1744    let mut evidence = Vec::new();
1745    let mut source_types = BTreeSet::new();
1746    let mut touched_paths = BTreeSet::new();
1747    let mut touched_symbols = BTreeSet::new();
1748    let mut cochange_paths = BTreeSet::new();
1749    let mut max_hotspot_score = 0.0_f32;
1750
1751    for touch in file_touches {
1752        touched_paths.insert(touch.path.clone());
1753        if let Some(previous_path) = &touch.previous_path {
1754            touched_paths.insert(previous_path.clone());
1755        }
1756    }
1757    for touch in symbol_touches {
1758        touched_symbols.insert(touch.qualified_name.clone());
1759    }
1760
1761    if let Some(task) = &query.task {
1762        let commit_tokens =
1763            tokenize_similarity_text(&format!("{} {}", commit.summary, commit.message));
1764        let overlaps = task_tokens
1765            .intersection(&commit_tokens)
1766            .cloned()
1767            .collect::<Vec<_>>();
1768        if !overlaps.is_empty() {
1769            let contribution = (overlaps.len() as f32 * 0.08).min(0.32);
1770            let task_score = round_similarity_score(contribution * 0.75);
1771            let metadata_score = round_similarity_score(contribution * 0.25);
1772            evidence.push(SimilarityEvidence {
1773                source_type: SimilarityEvidenceSource::TaskText,
1774                score: task_score,
1775                message: format!(
1776                    "task text matched commit summary/message token(s): {}",
1777                    overlaps.join(", ")
1778                ),
1779                query: Some(task.clone()),
1780                path: None,
1781                symbol: None,
1782                commit_id: Some(commit.id.clone()),
1783            });
1784            evidence.push(SimilarityEvidence {
1785                source_type: SimilarityEvidenceSource::CommitMetadata,
1786                score: metadata_score,
1787                message: "commit summary and message metadata overlap the requested task".into(),
1788                query: Some(task.clone()),
1789                path: None,
1790                symbol: None,
1791                commit_id: Some(commit.id.clone()),
1792            });
1793            score += contribution;
1794            source_types.insert(SimilarityEvidenceSource::TaskText);
1795            source_types.insert(SimilarityEvidenceSource::CommitMetadata);
1796        }
1797    }
1798
1799    let mut path_score = 0.0_f32;
1800    let mut matched_paths = BTreeSet::new();
1801    for touch in file_touches {
1802        let path = history_path(&touch.path)?;
1803        if query_paths.contains(&path) && matched_paths.insert(path.clone()) {
1804            path_score += 0.42;
1805            evidence.push(SimilarityEvidence {
1806                source_type: SimilarityEvidenceSource::Path,
1807                score: 0.42,
1808                message: "commit touched an exact query path".into(),
1809                query: Some(path.clone()),
1810                path: Some(PathBuf::from(path)),
1811                symbol: None,
1812                commit_id: Some(commit.id.clone()),
1813            });
1814        }
1815        if let Some(previous_path) = &touch.previous_path {
1816            let previous_path = history_path(previous_path)?;
1817            if query_paths.contains(&previous_path) && matched_paths.insert(previous_path.clone()) {
1818                path_score += 0.32;
1819                evidence.push(SimilarityEvidence {
1820                    source_type: SimilarityEvidenceSource::Path,
1821                    score: 0.32,
1822                    message: "commit touched a previous name for a query path".into(),
1823                    query: Some(previous_path.clone()),
1824                    path: Some(PathBuf::from(previous_path)),
1825                    symbol: None,
1826                    commit_id: Some(commit.id.clone()),
1827                });
1828            }
1829        }
1830    }
1831    if path_score > 0.0 {
1832        score += path_score.min(0.50);
1833        source_types.insert(SimilarityEvidenceSource::Path);
1834    }
1835
1836    let mut symbol_score = 0.0_f32;
1837    let mut matched_symbols = BTreeSet::new();
1838    for touch in symbol_touches {
1839        for query_symbol in symbol_queries {
1840            let Some((matched_symbol, contribution)) = similarity_symbol_match(query_symbol, touch)
1841            else {
1842                continue;
1843            };
1844            if matched_symbols.insert((query_symbol.clone(), matched_symbol.clone())) {
1845                symbol_score += contribution;
1846                evidence.push(SimilarityEvidence {
1847                    source_type: SimilarityEvidenceSource::Symbol,
1848                    score: contribution,
1849                    message: "commit touched a symbol matching the query".into(),
1850                    query: Some(query_symbol.clone()),
1851                    path: Some(touch.file_path.clone()),
1852                    symbol: Some(matched_symbol),
1853                    commit_id: Some(commit.id.clone()),
1854                });
1855            }
1856        }
1857    }
1858    if symbol_score > 0.0 {
1859        score += symbol_score.min(0.45);
1860        source_types.insert(SimilarityEvidenceSource::Symbol);
1861    }
1862
1863    let mut cochange_score = 0.0_f32;
1864    let mut matched_cochanges = BTreeSet::new();
1865    for touch in file_touches {
1866        let path = history_path(&touch.path)?;
1867        if let Some(edges) = query_neighbors.get(&path) {
1868            for edge in edges {
1869                let edge_path = history_path(&edge.path)?;
1870                let edge_cochanged = history_path(&edge.cochanged_path)?;
1871                let neighbor = if query_paths.contains(&edge_path) {
1872                    edge_cochanged
1873                } else {
1874                    edge_path
1875                };
1876                if matched_cochanges.insert(neighbor.clone()) {
1877                    let contribution = (0.16 + edge.recency_weight.min(2.5) * 0.03).min(0.26);
1878                    cochange_score += contribution;
1879                    cochange_paths.insert(PathBuf::from(neighbor.clone()));
1880                    evidence.push(SimilarityEvidence {
1881                        source_type: SimilarityEvidenceSource::Cochange,
1882                        score: round_similarity_score(contribution),
1883                        message: "commit touched a co-change neighbor of a query path".into(),
1884                        query: query_paths.iter().next().cloned(),
1885                        path: Some(PathBuf::from(neighbor)),
1886                        symbol: None,
1887                        commit_id: Some(commit.id.clone()),
1888                    });
1889                }
1890            }
1891        }
1892    }
1893    if let Some(edges) = sample_edges_by_commit.get(&commit.id.0) {
1894        for edge in edges {
1895            let sample_key = format!(
1896                "sample:{}:{}",
1897                edge.path.display(),
1898                edge.cochanged_path.display()
1899            );
1900            if matched_cochanges.insert(sample_key) {
1901                let contribution = 0.10_f32;
1902                cochange_score += contribution;
1903                cochange_paths.insert(edge.path.clone());
1904                cochange_paths.insert(edge.cochanged_path.clone());
1905                evidence.push(SimilarityEvidence {
1906                    source_type: SimilarityEvidenceSource::Cochange,
1907                    score: contribution,
1908                    message: "commit is a persisted sample for a query path co-change edge".into(),
1909                    query: query_paths.iter().next().cloned(),
1910                    path: Some(edge.cochanged_path.clone()),
1911                    symbol: None,
1912                    commit_id: Some(commit.id.clone()),
1913                });
1914            }
1915        }
1916    }
1917    if cochange_score > 0.0 {
1918        score += cochange_score.min(0.35);
1919        source_types.insert(SimilarityEvidenceSource::Cochange);
1920    }
1921
1922    let mut churn_score = 0.0_f32;
1923    let mut matched_hotspots = BTreeSet::new();
1924    for touch in file_touches {
1925        let path = history_path(&touch.path)?;
1926        if !query_related_paths.contains(&path) {
1927            continue;
1928        }
1929        let Some(summary) = hotspots.get(&path) else {
1930            continue;
1931        };
1932        if summary.stats.hotspot_score <= 0.0 || !matched_hotspots.insert(path.clone()) {
1933            continue;
1934        }
1935        let contribution = (summary.stats.hotspot_score.ln_1p() * 0.08).min(0.14);
1936        churn_score += contribution;
1937        max_hotspot_score = max_hotspot_score.max(summary.stats.hotspot_score);
1938        evidence.push(SimilarityEvidence {
1939            source_type: SimilarityEvidenceSource::Churn,
1940            score: round_similarity_score(contribution),
1941            message: "commit touched a query-related historical churn hotspot".into(),
1942            query: Some(path.clone()),
1943            path: Some(PathBuf::from(path)),
1944            symbol: None,
1945            commit_id: Some(commit.id.clone()),
1946        });
1947    }
1948    if churn_score > 0.0 {
1949        score += churn_score.min(0.18);
1950        source_types.insert(SimilarityEvidenceSource::Churn);
1951    }
1952
1953    let rounded_score = round_similarity_score(score.min(1.0));
1954    let confidence = similar_change_confidence(rounded_score, &source_types);
1955    let mut uncertainty = Vec::new();
1956    if source_types == BTreeSet::from([SimilarityEvidenceSource::Path]) {
1957        uncertainty.push("similarity is based only on exact path overlap".into());
1958    }
1959    if confidence == Confidence::Low {
1960        uncertainty
1961            .push("low-confidence historical similarity; inspect the commit before reuse".into());
1962    }
1963    if query.task.is_some() && !source_types.contains(&SimilarityEvidenceSource::TaskText) {
1964        uncertainty.push("task text did not match this commit's summary or message".into());
1965    }
1966    uncertainty.sort();
1967    uncertainty.dedup();
1968
1969    Ok(SimilarChangeHit {
1970        change: HistoricalChangeSummary {
1971            commit: commit.clone(),
1972            touched_paths: touched_paths.into_iter().collect(),
1973            touched_symbols: touched_symbols.into_iter().collect(),
1974            cochange_paths: cochange_paths.into_iter().collect(),
1975            churn_hotspot_score: round_similarity_score(max_hotspot_score),
1976        },
1977        score: rounded_score,
1978        confidence,
1979        evidence,
1980        uncertainty,
1981    })
1982}
1983
1984fn tokenize_similarity_text(text: &str) -> BTreeSet<String> {
1985    const STOP_WORDS: &[&str] = &[
1986        "and", "are", "but", "for", "from", "into", "the", "this", "that", "with", "your", "you",
1987        "fix", "add", "use", "using",
1988    ];
1989    let stop_words = STOP_WORDS.iter().copied().collect::<BTreeSet<_>>();
1990    let mut tokens = BTreeSet::new();
1991    let mut current = String::new();
1992    for ch in text.chars().flat_map(char::to_lowercase) {
1993        if ch.is_ascii_alphanumeric() {
1994            current.push(ch);
1995        } else if !current.is_empty() {
1996            if current.len() >= 3 && !stop_words.contains(current.as_str()) {
1997                tokens.insert(std::mem::take(&mut current));
1998            } else {
1999                current.clear();
2000            }
2001        }
2002    }
2003    if current.len() >= 3 && !stop_words.contains(current.as_str()) {
2004        tokens.insert(current);
2005    }
2006    tokens
2007}
2008
2009fn similarity_symbol_match(query_symbol: &str, touch: &GitSymbolTouch) -> Option<(String, f32)> {
2010    let qualified = touch.qualified_name.to_lowercase();
2011    let symbol_id = touch
2012        .symbol_id
2013        .as_ref()
2014        .map(|id| id.0.to_lowercase())
2015        .unwrap_or_default();
2016    let namespace_tail = qualified.rsplit("::").next().unwrap_or(&qualified);
2017    let short_name = namespace_tail.rsplit('.').next().unwrap_or(namespace_tail);
2018    if query_symbol == qualified || query_symbol == symbol_id || query_symbol == short_name {
2019        Some((touch.qualified_name.clone(), 0.35))
2020    } else if qualified.contains(query_symbol) {
2021        Some((touch.qualified_name.clone(), 0.18))
2022    } else {
2023        None
2024    }
2025}
2026
2027fn similar_change_confidence(
2028    score: f32,
2029    source_types: &BTreeSet<SimilarityEvidenceSource>,
2030) -> Confidence {
2031    let source_count = source_types.len();
2032    if (source_count >= 4 && score >= 0.75) || (source_count >= 3 && score >= 0.55) {
2033        Confidence::High
2034    } else if source_count >= 2 && score >= 0.35 {
2035        Confidence::Medium
2036    } else {
2037        Confidence::Low
2038    }
2039}
2040
2041fn round_similarity_score(score: f32) -> f32 {
2042    (score * 1000.0).round() / 1000.0
2043}
2044
2045fn lower_history_confidence(left: Confidence, right: Confidence) -> Confidence {
2046    if history_confidence_rank(left) <= history_confidence_rank(right) {
2047        left
2048    } else {
2049        right
2050    }
2051}
2052
2053fn history_confidence_rank(confidence: Confidence) -> u8 {
2054    match confidence {
2055        Confidence::Low => 0,
2056        Confidence::Medium => 1,
2057        Confidence::High => 2,
2058        Confidence::Exact => 3,
2059    }
2060}
2061const DEFAULT_GRAPH_QUERY_LIMIT: usize = 100;
2062const MAX_GRAPH_QUERY_LIMIT: usize = 1_000;
2063
2064struct IndexRows<'a> {
2065    files: &'a [File],
2066    symbols: &'a [Symbol],
2067    chunks: &'a [CodeChunk],
2068    tests: &'a [TestTarget],
2069    imports: &'a [Import],
2070    occurrences: &'a [SymbolOccurrence],
2071    analysis_facts: &'a [AnalysisFact],
2072}
2073
2074fn insert_index_rows(tx: &Transaction<'_>, rows: IndexRows<'_>) -> Result<()> {
2075    for file in rows.files {
2076        tx.execute(
2077            "INSERT INTO files(id, path, json) VALUES(?1, ?2, ?3)",
2078            params![
2079                &file.id.0,
2080                file.path.to_string_lossy().as_ref(),
2081                serde_json::to_string(file)?
2082            ],
2083        )
2084        .map_err(storage_err)?;
2085    }
2086    for symbol in rows.symbols {
2087        tx.execute(
2088            "INSERT INTO symbols(id, name, qualified_name, file_id, json) VALUES(?1, ?2, ?3, ?4, ?5)",
2089            params![
2090                &symbol.id.0,
2091                &symbol.name,
2092                &symbol.qualified_name,
2093                &symbol.file_id.0,
2094                serde_json::to_string(symbol)?
2095            ],
2096        )
2097        .map_err(storage_err)?;
2098    }
2099    for chunk in rows.chunks {
2100        tx.execute(
2101            "INSERT INTO chunks(id, file_id, start_line, end_line, text, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
2102            params![
2103                &chunk.id,
2104                &chunk.file_id.0,
2105                chunk.range.start,
2106                chunk.range.end,
2107                &chunk.text,
2108                serde_json::to_string(chunk)?
2109            ],
2110        )
2111        .map_err(storage_err)?;
2112    }
2113    for test in rows.tests {
2114        tx.execute(
2115            "INSERT INTO tests(id, file_id, json) VALUES(?1, ?2, ?3) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
2116            params![&test.id, &test.file_id.0, serde_json::to_string(test)?],
2117        )
2118        .map_err(storage_err)?;
2119    }
2120    for import in rows.imports {
2121        tx.execute(
2122            "INSERT INTO imports(id, file_id, imported, json) VALUES(?1, ?2, ?3, ?4)",
2123            params![
2124                occurrence_id(
2125                    &import.file_id.0,
2126                    &import.imported,
2127                    import.range.as_ref().map(|range| range.start),
2128                    true
2129                ),
2130                &import.file_id.0,
2131                &import.imported,
2132                serde_json::to_string(import)?
2133            ],
2134        )
2135        .map_err(storage_err)?;
2136    }
2137    for occurrence in rows.occurrences {
2138        tx.execute(
2139            "INSERT INTO occurrences(id, symbol_id, file_id, is_definition, json) VALUES(?1, ?2, ?3, ?4, ?5)",
2140            params![
2141                occurrence_id(
2142                    &occurrence.file_id.0,
2143                    &occurrence.symbol_id.0,
2144                    occurrence.range.as_ref().map(|range| range.start),
2145                    occurrence.is_definition,
2146                ),
2147                &occurrence.symbol_id.0,
2148                &occurrence.file_id.0,
2149                if occurrence.is_definition { 1 } else { 0 },
2150                serde_json::to_string(occurrence)?
2151            ],
2152        )
2153        .map_err(storage_err)?;
2154    }
2155    for fact in rows.analysis_facts {
2156        tx.execute(
2157            "INSERT INTO analysis_facts(id, file_id, source_type, target, json) VALUES(?1, ?2, ?3, ?4, ?5)",
2158            params![
2159                &fact.id,
2160                &fact.file_id.0,
2161                source_type_name(&fact.source_type),
2162                &fact.target,
2163                serde_json::to_string(fact)?
2164            ],
2165        )
2166        .map_err(storage_err)?;
2167    }
2168    Ok(())
2169}
2170
2171fn insert_graph_rows(tx: &Transaction<'_>, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
2172    for node in nodes {
2173        let evidence_available = node.file_id.is_some() || node.symbol_id.is_some();
2174        tx.execute(
2175            "INSERT INTO graph_nodes(id, label, node_type, file_id, symbol_id, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
2176            params![
2177                &node.id.0,
2178                &node.label,
2179                format!("{:?}", node.node_type),
2180                node.file_id.as_ref().map(|f| &f.0),
2181                node.symbol_id.as_ref().map(|s| &s.0),
2182                evidence_available,
2183                0_i64,
2184                serde_json::to_string(node)?
2185            ],
2186        )
2187        .map_err(storage_err)?;
2188    }
2189    for edge in edges {
2190        let freshness = edge.evidence.indexed_at.timestamp();
2191        tx.execute(
2192            "INSERT INTO graph_edges(id, from_id, to_id, edge_type, confidence, source_type, source_file, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
2193            params![
2194                &edge.id.0,
2195                &edge.from.0,
2196                &edge.to.0,
2197                format!("{:?}", edge.edge_type),
2198                format!("{:?}", edge.evidence.confidence),
2199                format!("{:?}", edge.evidence.source_type),
2200                &edge.evidence.source,
2201                true,
2202                freshness,
2203                serde_json::to_string(edge)?
2204            ],
2205        )
2206        .map_err(storage_err)?;
2207    }
2208    Ok(())
2209}
2210
2211fn clamp_limit(limit: usize) -> usize {
2212    if limit == 0 {
2213        DEFAULT_GRAPH_QUERY_LIMIT
2214    } else {
2215        limit.min(MAX_GRAPH_QUERY_LIMIT)
2216    }
2217}
2218
2219impl GraphStore for SqliteStore {
2220    fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
2221        let mut conn = self
2222            .connection
2223            .lock()
2224            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2225        let tx = conn.transaction().map_err(storage_err)?;
2226        tx.execute("DELETE FROM graph_edges", [])
2227            .map_err(storage_err)?;
2228        tx.execute("DELETE FROM graph_nodes", [])
2229            .map_err(storage_err)?;
2230        insert_graph_rows(&tx, nodes, edges)?;
2231        tx.commit().map_err(storage_err)?;
2232        Ok(())
2233    }
2234
2235    fn node_type_stats(
2236        &self,
2237    ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
2238        let conn = self
2239            .connection
2240            .lock()
2241            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2242        let mut stmt = conn
2243            .prepare("SELECT node_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_nodes GROUP BY node_type")
2244            .map_err(storage_err)?;
2245        let mut rows = stmt.query([]).map_err(storage_err)?;
2246        let mut map = std::collections::HashMap::new();
2247        while let Some(row) = rows.next().map_err(storage_err)? {
2248            let t: String = row.get(0).map_err(storage_err)?;
2249            let c: i64 = row.get(1).map_err(storage_err)?;
2250            let ev: bool = row.get(2).unwrap_or(false);
2251            let fr: Option<i64> = row.get(3).unwrap_or(None);
2252            map.insert(
2253                t,
2254                open_kioku_storage::TypeStats {
2255                    count: c as usize,
2256                    evidence_available: ev,
2257                    freshness: fr.map(|v| v as u64),
2258                },
2259            );
2260        }
2261        Ok(map)
2262    }
2263
2264    fn edge_type_stats(
2265        &self,
2266    ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
2267        let conn = self
2268            .connection
2269            .lock()
2270            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2271        let mut stmt = conn
2272            .prepare("SELECT edge_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_edges GROUP BY edge_type")
2273            .map_err(storage_err)?;
2274        let mut rows = stmt.query([]).map_err(storage_err)?;
2275        let mut map = std::collections::HashMap::new();
2276        while let Some(row) = rows.next().map_err(storage_err)? {
2277            let t: String = row.get(0).map_err(storage_err)?;
2278            let c: i64 = row.get(1).map_err(storage_err)?;
2279            let ev: bool = row.get(2).unwrap_or(false);
2280            let fr: Option<i64> = row.get(3).unwrap_or(None);
2281            map.insert(
2282                t,
2283                open_kioku_storage::TypeStats {
2284                    count: c as usize,
2285                    evidence_available: ev,
2286                    freshness: fr.map(|v| v as u64),
2287                },
2288            );
2289        }
2290        Ok(map)
2291    }
2292
2293    fn node_by_id(&self, id: &str) -> Result<Option<GraphNode>> {
2294        let conn = self
2295            .connection
2296            .lock()
2297            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2298        graph_node_by_id(&conn, id)
2299    }
2300
2301    fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)> {
2302        let conn = self
2303            .connection
2304            .lock()
2305            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2306        let mut stmt = conn
2307            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 OR to_id = ?1 LIMIT ?2")
2308            .map_err(storage_err)?;
2309        let rows = stmt
2310            .query_map(params![node, limit as i64], |row| row.get::<_, String>(0))
2311            .map_err(storage_err)?;
2312        let edges: Vec<GraphEdge> = collect_json(rows)?;
2313        let mut ids = edges
2314            .iter()
2315            .flat_map(|edge| [edge.from.0.clone(), edge.to.0.clone()])
2316            .collect::<Vec<_>>();
2317        ids.sort();
2318        ids.dedup();
2319        let mut nodes = Vec::new();
2320        for id in ids {
2321            if let Some(node) = graph_node_by_id(&conn, &id)? {
2322                nodes.push(node);
2323            }
2324        }
2325        Ok((nodes, edges))
2326    }
2327
2328    fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>> {
2329        use std::collections::{HashSet, VecDeque};
2330
2331        let conn = self
2332            .connection
2333            .lock()
2334            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2335
2336        // Prepare the statement once outside the BFS loop to avoid
2337        // O(N) statement recompilation on large graphs.
2338        let mut edge_stmt = conn
2339            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1")
2340            .map_err(storage_err)?;
2341
2342        let mut queue = VecDeque::from([(from.to_string(), Vec::<GraphEdge>::new())]);
2343        let mut seen = HashSet::new();
2344        while let Some((node, path)) = queue.pop_front() {
2345            if node == to {
2346                return Ok(path);
2347            }
2348            if path.len() >= max_depth || !seen.insert(node.clone()) {
2349                continue;
2350            }
2351            let rows = edge_stmt
2352                .query_map(params![&node], |row| row.get::<_, String>(0))
2353                .map_err(storage_err)?;
2354            let edges: Vec<GraphEdge> = collect_json(rows)?;
2355            for edge in edges {
2356                let mut next_path = path.clone();
2357                next_path.push(edge.clone());
2358                queue.push_back((edge.to.0.clone(), next_path));
2359            }
2360        }
2361        Ok(Vec::new())
2362    }
2363    fn nodes_by_type(
2364        &self,
2365        node_type: GraphNodeType,
2366        limit: usize,
2367        offset: usize,
2368    ) -> Result<Vec<GraphNode>> {
2369        let conn = self
2370            .connection
2371            .lock()
2372            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2373        let limit = clamp_limit(limit) as i64;
2374        let offset = offset as i64;
2375        let type_str = format!("{:?}", node_type);
2376        let mut stmt = conn
2377            .prepare(
2378                "SELECT json FROM graph_nodes WHERE node_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
2379            )
2380            .map_err(storage_err)?;
2381        let rows = stmt
2382            .query_map(params![type_str, limit, offset], |row| {
2383                row.get::<_, String>(0)
2384            })
2385            .map_err(storage_err)?;
2386        collect_json(rows)
2387    }
2388
2389    fn all_graph_nodes(&self) -> Result<Vec<GraphNode>> {
2390        let conn = self
2391            .connection
2392            .lock()
2393            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2394        let mut stmt = conn
2395            .prepare("SELECT json FROM graph_nodes ORDER BY id")
2396            .map_err(storage_err)?;
2397        let rows = stmt
2398            .query_map([], |row| row.get::<_, String>(0))
2399            .map_err(storage_err)?;
2400        collect_json(rows)
2401    }
2402
2403    fn edges_by_type(
2404        &self,
2405        edge_type: GraphEdgeType,
2406        limit: usize,
2407        offset: usize,
2408    ) -> Result<Vec<GraphEdge>> {
2409        let conn = self
2410            .connection
2411            .lock()
2412            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2413        let limit = clamp_limit(limit) as i64;
2414        let offset = offset as i64;
2415        let type_str = format!("{:?}", edge_type);
2416        let mut stmt = conn
2417            .prepare(
2418                "SELECT json FROM graph_edges WHERE edge_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
2419            )
2420            .map_err(storage_err)?;
2421        let rows = stmt
2422            .query_map(params![type_str, limit, offset], |row| {
2423                row.get::<_, String>(0)
2424            })
2425            .map_err(storage_err)?;
2426        collect_json(rows)
2427    }
2428
2429    fn graph_counts(&self) -> Result<GraphCounts> {
2430        let conn = self
2431            .connection
2432            .lock()
2433            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2434        let nodes: usize = conn
2435            .query_row("SELECT COUNT(*) FROM graph_nodes", [], |row| row.get(0))
2436            .map_err(storage_err)?;
2437        let edges: usize = conn
2438            .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
2439            .map_err(storage_err)?;
2440        Ok(GraphCounts { nodes, edges })
2441    }
2442
2443    fn graph_schema_counts(&self) -> Result<GraphSchemaCounts> {
2444        let conn = self
2445            .connection
2446            .lock()
2447            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2448
2449        let mut node_types = std::collections::BTreeMap::new();
2450        let mut stmt = conn
2451            .prepare("SELECT node_type, COUNT(*) FROM graph_nodes GROUP BY node_type")
2452            .map_err(storage_err)?;
2453        let mut rows = stmt.query([]).map_err(storage_err)?;
2454        while let Some(row) = rows.next().map_err(storage_err)? {
2455            let ntype: String = row.get(0).map_err(storage_err)?;
2456            let count: usize = row.get(1).map_err(storage_err)?;
2457            if !ntype.is_empty() {
2458                node_types.insert(ntype, count);
2459            }
2460        }
2461
2462        let mut edge_types = std::collections::BTreeMap::new();
2463        let mut stmt = conn
2464            .prepare("SELECT edge_type, COUNT(*) FROM graph_edges GROUP BY edge_type")
2465            .map_err(storage_err)?;
2466        let mut rows = stmt.query([]).map_err(storage_err)?;
2467        while let Some(row) = rows.next().map_err(storage_err)? {
2468            let etype: String = row.get(0).map_err(storage_err)?;
2469            let count: usize = row.get(1).map_err(storage_err)?;
2470            if !etype.is_empty() {
2471                edge_types.insert(etype, count);
2472            }
2473        }
2474
2475        Ok(GraphSchemaCounts {
2476            node_types,
2477            edge_types,
2478        })
2479    }
2480
2481    fn graph_edges_between(&self, from: &str, to: &str, limit: usize) -> Result<Vec<GraphEdge>> {
2482        let conn = self
2483            .connection
2484            .lock()
2485            .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2486        let limit = clamp_limit(limit) as i64;
2487        let mut stmt = conn
2488            .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 AND to_id = ?2 ORDER BY id LIMIT ?3")
2489            .map_err(storage_err)?;
2490        let rows = stmt
2491            .query_map(params![from, to, limit], |row| row.get::<_, String>(0))
2492            .map_err(storage_err)?;
2493        collect_json(rows)
2494    }
2495}
2496
2497fn is_duplicate_column(err: &rusqlite::Error) -> bool {
2498    if let rusqlite::Error::SqliteFailure(_, Some(msg)) = err {
2499        msg.contains("duplicate column name")
2500    } else {
2501        false
2502    }
2503}
2504
2505fn add_column_if_not_exists(conn: &mut Connection, stmt: &str) -> Result<()> {
2506    match conn.execute(stmt, []) {
2507        Ok(_) => Ok(()),
2508        Err(err) if is_duplicate_column(&err) => Ok(()),
2509        Err(err) => Err(storage_err(err)),
2510    }
2511}
2512
2513fn migrate_graph_schema(conn: &mut Connection) -> Result<()> {
2514    // Add columns to graph_nodes
2515    add_column_if_not_exists(
2516        conn,
2517        "ALTER TABLE graph_nodes ADD COLUMN node_type TEXT DEFAULT ''",
2518    )?;
2519    add_column_if_not_exists(
2520        conn,
2521        "ALTER TABLE graph_nodes ADD COLUMN file_id TEXT DEFAULT ''",
2522    )?;
2523    add_column_if_not_exists(
2524        conn,
2525        "ALTER TABLE graph_nodes ADD COLUMN symbol_id TEXT DEFAULT ''",
2526    )?;
2527    add_column_if_not_exists(
2528        conn,
2529        "ALTER TABLE graph_nodes ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
2530    )?;
2531    add_column_if_not_exists(
2532        conn,
2533        "ALTER TABLE graph_nodes ADD COLUMN freshness INTEGER DEFAULT 0",
2534    )?;
2535
2536    // Add columns to graph_edges
2537    add_column_if_not_exists(
2538        conn,
2539        "ALTER TABLE graph_edges ADD COLUMN confidence TEXT DEFAULT ''",
2540    )?;
2541    add_column_if_not_exists(
2542        conn,
2543        "ALTER TABLE graph_edges ADD COLUMN source_type TEXT DEFAULT ''",
2544    )?;
2545    add_column_if_not_exists(
2546        conn,
2547        "ALTER TABLE graph_edges ADD COLUMN source_file TEXT DEFAULT ''",
2548    )?;
2549    add_column_if_not_exists(
2550        conn,
2551        "ALTER TABLE graph_edges ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
2552    )?;
2553    add_column_if_not_exists(
2554        conn,
2555        "ALTER TABLE graph_edges ADD COLUMN freshness INTEGER DEFAULT 0",
2556    )?;
2557
2558    backfill_graph_query_columns(conn)?;
2559
2560    // Add indexes (these are idempotent via IF NOT EXISTS)
2561    conn.execute(
2562        "CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(node_type)",
2563        [],
2564    )
2565    .map_err(storage_err)?;
2566    conn.execute(
2567        "CREATE INDEX IF NOT EXISTS idx_graph_nodes_file ON graph_nodes(file_id)",
2568        [],
2569    )
2570    .map_err(storage_err)?;
2571    conn.execute(
2572        "CREATE INDEX IF NOT EXISTS idx_graph_nodes_symbol ON graph_nodes(symbol_id)",
2573        [],
2574    )
2575    .map_err(storage_err)?;
2576    conn.execute(
2577        "CREATE INDEX IF NOT EXISTS idx_graph_edges_type ON graph_edges(edge_type)",
2578        [],
2579    )
2580    .map_err(storage_err)?;
2581    conn.execute(
2582        "CREATE INDEX IF NOT EXISTS idx_graph_edges_from_type ON graph_edges(from_id, edge_type)",
2583        [],
2584    )
2585    .map_err(storage_err)?;
2586    conn.execute(
2587        "CREATE INDEX IF NOT EXISTS idx_graph_edges_to_type ON graph_edges(to_id, edge_type)",
2588        [],
2589    )
2590    .map_err(storage_err)?;
2591    conn.execute(
2592        "CREATE INDEX IF NOT EXISTS idx_graph_edges_source_type ON graph_edges(source_type)",
2593        [],
2594    )
2595    .map_err(storage_err)?;
2596
2597    let version: i64 = conn
2598        .pragma_query_value(None, "user_version", |row| row.get(0))
2599        .map_err(storage_err)?;
2600    if version < SQLITE_GRAPH_SCHEMA_VERSION {
2601        conn.pragma_update(None, "user_version", SQLITE_GRAPH_SCHEMA_VERSION)
2602            .map_err(storage_err)?;
2603    }
2604
2605    Ok(())
2606}
2607
2608fn backfill_graph_query_columns(conn: &mut Connection) -> Result<()> {
2609    let node_rows = {
2610        let mut stmt = conn
2611            .prepare(
2612                "SELECT id, json FROM graph_nodes
2613                 WHERE COALESCE(node_type, '') = ''
2614                    OR COALESCE(file_id, '') = ''
2615                    OR COALESCE(symbol_id, '') = ''",
2616            )
2617            .map_err(storage_err)?;
2618        let rows = stmt
2619            .query_map([], |row| {
2620                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2621            })
2622            .map_err(storage_err)?;
2623        let mut rows_out = Vec::new();
2624        for row in rows {
2625            rows_out.push(row.map_err(storage_err)?);
2626        }
2627        rows_out
2628    };
2629    if !node_rows.is_empty() {
2630        let tx = conn.transaction().map_err(storage_err)?;
2631        for (id, json) in node_rows {
2632            let Ok(node) = serde_json::from_str::<GraphNode>(&json) else {
2633                continue;
2634            };
2635            tx.execute(
2636                "UPDATE graph_nodes
2637                 SET node_type = ?1,
2638                     file_id = ?2,
2639                     symbol_id = ?3,
2640                     evidence_available = ?4
2641                 WHERE id = ?5",
2642                params![
2643                    format!("{:?}", node.node_type),
2644                    node.file_id.as_ref().map(|file_id| file_id.0.as_str()),
2645                    node.symbol_id
2646                        .as_ref()
2647                        .map(|symbol_id| symbol_id.0.as_str()),
2648                    node.file_id.is_some() || node.symbol_id.is_some(),
2649                    id,
2650                ],
2651            )
2652            .map_err(storage_err)?;
2653        }
2654        tx.commit().map_err(storage_err)?;
2655    }
2656
2657    let edge_rows = {
2658        let mut stmt = conn
2659            .prepare(
2660                "SELECT id, json FROM graph_edges
2661                 WHERE COALESCE(edge_type, '') = ''
2662                    OR COALESCE(confidence, '') = ''
2663                    OR COALESCE(source_type, '') = ''
2664                    OR COALESCE(source_file, '') = ''",
2665            )
2666            .map_err(storage_err)?;
2667        let rows = stmt
2668            .query_map([], |row| {
2669                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2670            })
2671            .map_err(storage_err)?;
2672        let mut rows_out = Vec::new();
2673        for row in rows {
2674            rows_out.push(row.map_err(storage_err)?);
2675        }
2676        rows_out
2677    };
2678    if !edge_rows.is_empty() {
2679        let tx = conn.transaction().map_err(storage_err)?;
2680        for (id, json) in edge_rows {
2681            let Ok(edge) = serde_json::from_str::<GraphEdge>(&json) else {
2682                continue;
2683            };
2684            tx.execute(
2685                "UPDATE graph_edges
2686                 SET from_id = ?1,
2687                     to_id = ?2,
2688                     edge_type = ?3,
2689                     confidence = ?4,
2690                     source_type = ?5,
2691                     source_file = ?6,
2692                     evidence_available = ?7,
2693                     freshness = ?8
2694                 WHERE id = ?9",
2695                params![
2696                    edge.from.0.as_str(),
2697                    edge.to.0.as_str(),
2698                    format!("{:?}", edge.edge_type),
2699                    format!("{:?}", edge.evidence.confidence),
2700                    format!("{:?}", edge.evidence.source_type),
2701                    edge.evidence.source.as_str(),
2702                    true,
2703                    edge.evidence.indexed_at.timestamp(),
2704                    id,
2705                ],
2706            )
2707            .map_err(storage_err)?;
2708        }
2709        tx.commit().map_err(storage_err)?;
2710    }
2711
2712    Ok(())
2713}
2714
2715fn migrate_history_schema(conn: &mut Connection) -> Result<()> {
2716    ensure_supported_sqlite_schema(conn)?;
2717    let version: i64 = conn
2718        .pragma_query_value(None, "user_version", |row| row.get(0))
2719        .map_err(storage_err)?;
2720    let tx = conn.transaction().map_err(storage_err)?;
2721    tx.execute_batch(HISTORY_SCHEMA_V1).map_err(storage_err)?;
2722    if version < SQLITE_HISTORY_SCHEMA_VERSION {
2723        tx.pragma_update(None, "user_version", SQLITE_HISTORY_SCHEMA_VERSION)
2724            .map_err(storage_err)?;
2725    }
2726    tx.commit().map_err(storage_err)?;
2727    Ok(())
2728}
2729
2730fn ensure_supported_sqlite_schema(conn: &Connection) -> Result<()> {
2731    let version: i64 = conn
2732        .pragma_query_value(None, "user_version", |row| row.get(0))
2733        .map_err(storage_err)?;
2734    if version > SQLITE_SUPPORTED_SCHEMA_VERSION {
2735        return Err(OkError::Storage(format!(
2736            "sqlite schema version {version} is newer than supported version {SQLITE_SUPPORTED_SCHEMA_VERSION}"
2737        )));
2738    }
2739    Ok(())
2740}
2741
2742fn validate_history_snapshot(snapshot: &HistorySnapshot) -> Result<()> {
2743    if snapshot.schema_version != HISTORY_SCHEMA_VERSION {
2744        return Err(OkError::Storage(format!(
2745            "unsupported history snapshot schema version {}; expected {}",
2746            snapshot.schema_version, HISTORY_SCHEMA_VERSION
2747        )));
2748    }
2749
2750    let mut commit_ids = BTreeSet::new();
2751    for commit in &snapshot.commits {
2752        validate_text("commit id", &commit.id.0)?;
2753        if !commit_ids.insert(commit.id.0.clone()) {
2754            return Err(OkError::Storage(format!(
2755                "duplicate history commit id `{}`",
2756                commit.id
2757            )));
2758        }
2759        validate_text("commit author name", &commit.author.name)?;
2760        if let Some(committer) = &commit.committer {
2761            validate_text("commit committer name", &committer.name)?;
2762        }
2763        let mut parent_ids = BTreeSet::new();
2764        for parent_id in &commit.parent_ids {
2765            validate_text("parent commit id", &parent_id.0)?;
2766            if !parent_ids.insert(parent_id.0.as_str()) {
2767                return Err(OkError::Storage(format!(
2768                    "commit `{}` contains duplicate parent `{parent_id}`",
2769                    commit.id
2770                )));
2771            }
2772        }
2773    }
2774
2775    let mut file_touch_ids = BTreeSet::new();
2776    for touch in &snapshot.file_touches {
2777        validate_history_record_id(&touch.id, "file touch", &mut file_touch_ids)?;
2778        validate_commit_reference(&touch.commit_id, &commit_ids, "file touch")?;
2779        history_path(&touch.path)?;
2780        if let Some(previous_path) = &touch.previous_path {
2781            history_path(previous_path)?;
2782        }
2783    }
2784
2785    let mut symbol_touch_ids = BTreeSet::new();
2786    for touch in &snapshot.symbol_touches {
2787        validate_history_record_id(&touch.id, "symbol touch", &mut symbol_touch_ids)?;
2788        validate_commit_reference(&touch.commit_id, &commit_ids, "symbol touch")?;
2789        validate_text("symbol qualified name", &touch.qualified_name)?;
2790        history_path(&touch.file_path)?;
2791    }
2792
2793    let mut cochange_ids = BTreeSet::new();
2794    let mut cochange_pairs = BTreeSet::new();
2795    for edge in &snapshot.cochange_edges {
2796        validate_history_record_id(&edge.id, "co-change edge", &mut cochange_ids)?;
2797        let path = history_path(&edge.path)?;
2798        let cochanged_path = history_path(&edge.cochanged_path)?;
2799        if path == cochanged_path {
2800            return Err(OkError::Storage(format!(
2801                "co-change edge `{}` must connect two different paths",
2802                edge.id
2803            )));
2804        }
2805        if !cochange_pairs.insert((path.clone(), cochanged_path.clone())) {
2806            return Err(OkError::Storage(format!(
2807                "duplicate co-change edge `{path}` -> `{cochanged_path}`"
2808            )));
2809        }
2810        if edge.commit_count == 0 {
2811            return Err(OkError::Storage(format!(
2812                "co-change edge `{}` must have a positive commit count",
2813                edge.id
2814            )));
2815        }
2816        if !edge.recency_weight.is_finite() || edge.recency_weight < 0.0 {
2817            return Err(OkError::Storage(format!(
2818                "co-change edge `{}` has invalid recency weight {}",
2819                edge.id, edge.recency_weight
2820            )));
2821        }
2822        let mut sample_commits = BTreeSet::new();
2823        for commit_id in &edge.sample_commits {
2824            validate_text("sample commit id", &commit_id.0)?;
2825            if !sample_commits.insert(commit_id.0.as_str()) {
2826                return Err(OkError::Storage(format!(
2827                    "co-change edge `{}` contains duplicate sample commit `{commit_id}`",
2828                    edge.id
2829                )));
2830            }
2831        }
2832    }
2833
2834    let mut reviewer_ids = BTreeSet::new();
2835    for evidence in &snapshot.reviewer_evidence {
2836        validate_history_record_id(&evidence.id, "review event", &mut reviewer_ids)?;
2837        validate_text("reviewer name", &evidence.reviewer.name)?;
2838        validate_text("review evidence source", &evidence.source)?;
2839        if let Some(commit_id) = &evidence.commit_id {
2840            validate_text("review commit id", &commit_id.0)?;
2841        }
2842        if let Some(path) = &evidence.path {
2843            history_path(path)?;
2844        }
2845    }
2846
2847    Ok(())
2848}
2849
2850fn validate_history_record_id(
2851    id: &HistoryRecordId,
2852    kind: &str,
2853    ids: &mut BTreeSet<String>,
2854) -> Result<()> {
2855    validate_text(&format!("{kind} id"), &id.0)?;
2856    if !ids.insert(id.0.clone()) {
2857        return Err(OkError::Storage(format!("duplicate {kind} id `{id}`")));
2858    }
2859    Ok(())
2860}
2861
2862fn validate_commit_reference(
2863    commit_id: &GitCommitId,
2864    commit_ids: &BTreeSet<String>,
2865    kind: &str,
2866) -> Result<()> {
2867    validate_text("commit id", &commit_id.0)?;
2868    if !commit_ids.contains(&commit_id.0) {
2869        return Err(OkError::Storage(format!(
2870            "{kind} references missing commit `{commit_id}`"
2871        )));
2872    }
2873    Ok(())
2874}
2875
2876fn validate_text(field: &str, value: &str) -> Result<()> {
2877    if value.trim().is_empty() {
2878        return Err(OkError::Storage(format!("{field} must not be empty")));
2879    }
2880    Ok(())
2881}
2882
2883fn history_path(path: &Path) -> Result<String> {
2884    if path.as_os_str().is_empty()
2885        || path.is_absolute()
2886        || path
2887            .components()
2888            .any(|component| !matches!(component, std::path::Component::Normal(_)))
2889    {
2890        return Err(OkError::Storage(format!(
2891            "history path must be a normalized repository-relative path: {}",
2892            path.display()
2893        )));
2894    }
2895    let value = path.to_str().ok_or_else(|| {
2896        OkError::Storage(format!(
2897            "history path must be valid UTF-8: {}",
2898            path.display()
2899        ))
2900    })?;
2901    if value.contains('\\') {
2902        return Err(OkError::Storage(format!(
2903            "history path must use `/` separators: {}",
2904            path.display()
2905        )));
2906    }
2907    Ok(value.to_string())
2908}
2909
2910#[derive(Debug, Clone)]
2911struct ChurnTouchSample {
2912    id: String,
2913    touched_at: DateTime<Utc>,
2914    additions: u32,
2915    deletions: u32,
2916    confidence: Confidence,
2917    uncertainty: Vec<String>,
2918}
2919
2920fn materialize_churn_summaries(snapshot: &HistorySnapshot) -> Result<Vec<ChurnSummary>> {
2921    let Some(reference_at) = newest_history_touch(snapshot) else {
2922        return Ok(Vec::new());
2923    };
2924
2925    let mut file_samples = BTreeMap::<String, Vec<ChurnTouchSample>>::new();
2926    let mut file_aliases = Vec::<(String, String)>::new();
2927    let mut module_samples = BTreeMap::<String, BTreeMap<String, ChurnTouchSample>>::new();
2928    let mut symbol_samples = BTreeMap::<String, SymbolChurnAccumulator>::new();
2929
2930    for touch in &snapshot.file_touches {
2931        let path = history_path(&touch.path)?;
2932        let sample = ChurnTouchSample {
2933            id: touch.id.0.clone(),
2934            touched_at: touch.touched_at,
2935            additions: touch.additions.unwrap_or_default(),
2936            deletions: touch.deletions.unwrap_or_default(),
2937            confidence: Confidence::Exact,
2938            uncertainty: Vec::new(),
2939        };
2940        file_samples
2941            .entry(path.clone())
2942            .or_default()
2943            .push(sample.clone());
2944        if let Some(previous_path) = &touch.previous_path {
2945            file_aliases.push((path, history_path(previous_path)?));
2946        }
2947    }
2948    let file_samples = expand_file_churn_aliases(file_samples, file_aliases);
2949    for (path, samples) in &file_samples {
2950        for module in churn_modules_for_path(Path::new(path)) {
2951            let module_entry = module_samples.entry(module).or_default();
2952            for sample in samples {
2953                module_entry.insert(sample.id.clone(), sample.clone());
2954            }
2955        }
2956    }
2957
2958    for touch in &snapshot.symbol_touches {
2959        let Some(symbol_id) = &touch.symbol_id else {
2960            continue;
2961        };
2962        let file_path = history_path(&touch.file_path)?;
2963        let entry = symbol_samples
2964            .entry(symbol_id.0.clone())
2965            .or_insert_with(|| SymbolChurnAccumulator {
2966                file_path: PathBuf::from(&file_path),
2967                symbol_id: symbol_id.clone(),
2968                qualified_name: touch.qualified_name.clone(),
2969                samples: Vec::new(),
2970                saw_uncertainty: false,
2971            });
2972        entry.samples.push(ChurnTouchSample {
2973            id: touch.id.0.clone(),
2974            touched_at: touch.touched_at,
2975            additions: 0,
2976            deletions: 0,
2977            confidence: touch.confidence,
2978            uncertainty: touch.uncertainty.clone(),
2979        });
2980        if !touch.uncertainty.is_empty() {
2981            entry.saw_uncertainty = true;
2982        }
2983    }
2984
2985    let mut summaries = Vec::new();
2986    for (path, samples) in file_samples {
2987        summaries.push(ChurnSummary {
2988            entity_kind: ChurnEntityKind::File,
2989            key: path.clone(),
2990            path: Some(PathBuf::from(path)),
2991            symbol_id: None,
2992            qualified_name: None,
2993            generated_at: reference_at,
2994            stats: churn_stats(&samples, reference_at),
2995            confidence: Confidence::Exact,
2996            uncertainty: Vec::new(),
2997        });
2998    }
2999    for (module, samples) in module_samples {
3000        let samples = samples.into_values().collect::<Vec<_>>();
3001        summaries.push(ChurnSummary {
3002            entity_kind: ChurnEntityKind::Module,
3003            key: module.clone(),
3004            path: Some(PathBuf::from(module)),
3005            symbol_id: None,
3006            qualified_name: None,
3007            generated_at: reference_at,
3008            stats: churn_stats(&samples, reference_at),
3009            confidence: Confidence::Medium,
3010            uncertainty: vec![
3011                "module churn is aggregated from persisted file touches in this directory tree"
3012                    .into(),
3013            ],
3014        });
3015    }
3016    for (key, entry) in symbol_samples {
3017        let mut uncertainty = entry
3018            .samples
3019            .iter()
3020            .flat_map(|sample| sample.uncertainty.iter().cloned())
3021            .collect::<BTreeSet<_>>()
3022            .into_iter()
3023            .collect::<Vec<_>>();
3024        if entry.saw_uncertainty {
3025            uncertainty.push("symbol churn inherits uncertainty from line-level history".into());
3026        }
3027        summaries.push(ChurnSummary {
3028            entity_kind: ChurnEntityKind::Symbol,
3029            key,
3030            path: Some(entry.file_path),
3031            symbol_id: Some(entry.symbol_id),
3032            qualified_name: Some(entry.qualified_name),
3033            generated_at: reference_at,
3034            stats: churn_stats(&entry.samples, reference_at),
3035            confidence: minimum_churn_confidence(&entry.samples),
3036            uncertainty,
3037        });
3038    }
3039
3040    summaries.sort_by(|left, right| {
3041        left.entity_kind
3042            .cmp(&right.entity_kind)
3043            .then_with(|| {
3044                right
3045                    .stats
3046                    .hotspot_score
3047                    .total_cmp(&left.stats.hotspot_score)
3048            })
3049            .then_with(|| right.stats.touch_count.cmp(&left.stats.touch_count))
3050            .then_with(|| left.key.cmp(&right.key))
3051    });
3052    Ok(summaries)
3053}
3054
3055#[derive(Debug, Clone)]
3056struct SymbolChurnAccumulator {
3057    file_path: PathBuf,
3058    symbol_id: SymbolId,
3059    qualified_name: String,
3060    samples: Vec<ChurnTouchSample>,
3061    saw_uncertainty: bool,
3062}
3063
3064fn newest_history_touch(snapshot: &HistorySnapshot) -> Option<DateTime<Utc>> {
3065    snapshot
3066        .file_touches
3067        .iter()
3068        .map(|touch| touch.touched_at)
3069        .chain(snapshot.symbol_touches.iter().map(|touch| touch.touched_at))
3070        .max()
3071}
3072
3073fn churn_modules_for_path(path: &Path) -> Vec<String> {
3074    let mut modules = Vec::new();
3075    let mut current = path.parent();
3076    while let Some(parent) = current {
3077        let key = if parent.as_os_str().is_empty() {
3078            "__root__".to_string()
3079        } else {
3080            parent.to_string_lossy().to_string()
3081        };
3082        modules.push(key);
3083        current = parent.parent();
3084    }
3085    if modules.is_empty() {
3086        modules.push("__root__".to_string());
3087    }
3088    modules
3089}
3090
3091fn expand_file_churn_aliases(
3092    samples: BTreeMap<String, Vec<ChurnTouchSample>>,
3093    aliases: Vec<(String, String)>,
3094) -> BTreeMap<String, Vec<ChurnTouchSample>> {
3095    if aliases.is_empty() {
3096        return samples;
3097    }
3098
3099    let mut groups = samples
3100        .keys()
3101        .map(|path| BTreeSet::from([path.clone()]))
3102        .collect::<Vec<_>>();
3103    for (path, previous_path) in aliases {
3104        merge_file_alias_group(&mut groups, path, previous_path);
3105    }
3106
3107    let mut expanded = BTreeMap::new();
3108    for group in groups {
3109        let mut combined = Vec::new();
3110        for path in &group {
3111            if let Some(path_samples) = samples.get(path) {
3112                combined.extend(path_samples.clone());
3113            }
3114        }
3115        if combined.is_empty() {
3116            continue;
3117        }
3118        for path in group {
3119            expanded.insert(path, combined.clone());
3120        }
3121    }
3122    expanded
3123}
3124
3125fn merge_file_alias_group(groups: &mut Vec<BTreeSet<String>>, path: String, previous_path: String) {
3126    let left = groups.iter().position(|group| group.contains(&path));
3127    let right = groups
3128        .iter()
3129        .position(|group| group.contains(&previous_path));
3130    match (left, right) {
3131        (Some(left), Some(right)) if left == right => {}
3132        (Some(left), Some(right)) => {
3133            let (keep, remove) = if left < right {
3134                (left, right)
3135            } else {
3136                (right, left)
3137            };
3138            let removed = groups.remove(remove);
3139            groups[keep].extend(removed);
3140        }
3141        (Some(index), None) => {
3142            groups[index].insert(previous_path);
3143        }
3144        (None, Some(index)) => {
3145            groups[index].insert(path);
3146        }
3147        (None, None) => {
3148            groups.push(BTreeSet::from([path, previous_path]));
3149        }
3150    }
3151}
3152
3153fn churn_stats(samples: &[ChurnTouchSample], reference_at: DateTime<Utc>) -> ChurnStats {
3154    let mut last_30d = 0;
3155    let mut last_90d = 0;
3156    let mut recency_weighted = 0.0_f32;
3157    let mut churn_volume = 0_u64;
3158
3159    for sample in samples {
3160        let age_seconds = reference_at
3161            .signed_duration_since(sample.touched_at)
3162            .num_seconds()
3163            .max(0) as f32;
3164        let age_days = age_seconds / 86_400.0;
3165        if age_days <= 30.0 {
3166            last_30d += 1;
3167        }
3168        if age_days <= 90.0 {
3169            last_90d += 1;
3170        }
3171        recency_weighted += 1.0 / (1.0 + age_days / 30.0);
3172        churn_volume += u64::from(sample.additions) + u64::from(sample.deletions);
3173    }
3174
3175    let touch_count = samples.len();
3176    let hotspot_score =
3177        recency_weighted * (touch_count as f32).ln_1p() + (churn_volume as f32).ln_1p() / 10.0;
3178    ChurnStats {
3179        all_time: touch_count,
3180        last_30d,
3181        last_90d,
3182        recency_weighted,
3183        touch_count,
3184        hotspot_score,
3185    }
3186}
3187
3188fn minimum_churn_confidence(samples: &[ChurnTouchSample]) -> Confidence {
3189    samples
3190        .iter()
3191        .map(|sample| sample.confidence)
3192        .min_by_key(|confidence| confidence_rank(*confidence))
3193        .unwrap_or(Confidence::Low)
3194}
3195
3196fn confidence_rank(confidence: Confidence) -> u8 {
3197    match confidence {
3198        Confidence::Low => 0,
3199        Confidence::Medium => 1,
3200        Confidence::High => 2,
3201        Confidence::Exact => 3,
3202    }
3203}
3204
3205fn churn_entity_kind_key(kind: ChurnEntityKind) -> &'static str {
3206    match kind {
3207        ChurnEntityKind::File => "file",
3208        ChurnEntityKind::Module => "module",
3209        ChurnEntityKind::Symbol => "symbol",
3210    }
3211}
3212
3213fn usize_to_i64(value: usize, field: &str) -> Result<i64> {
3214    i64::try_from(value)
3215        .map_err(|_| OkError::Storage(format!("{field} exceeds SQLite integer range")))
3216}
3217
3218fn history_query_limit(limit: usize) -> i64 {
3219    limit.saturating_add(1).min(i64::MAX as usize) as i64
3220}
3221
3222fn collect_limited_json<T, F>(
3223    rows: rusqlite::MappedRows<'_, F>,
3224    limit: usize,
3225) -> Result<(Vec<T>, bool)>
3226where
3227    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
3228    T: serde::de::DeserializeOwned,
3229{
3230    let mut values = collect_json(rows)?;
3231    let truncated = values.len() > limit;
3232    values.truncate(limit);
3233    Ok((values, truncated))
3234}
3235
3236fn collect_json<T, F>(rows: rusqlite::MappedRows<'_, F>) -> Result<Vec<T>>
3237where
3238    F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
3239    T: serde::de::DeserializeOwned,
3240{
3241    let mut out = Vec::new();
3242    for row in rows {
3243        let raw = row.map_err(storage_err)?;
3244        out.push(serde_json::from_str(&raw)?);
3245    }
3246    Ok(out)
3247}
3248
3249fn graph_node_by_id(conn: &Connection, id: &str) -> Result<Option<GraphNode>> {
3250    let raw: Option<String> = conn
3251        .query_row(
3252            "SELECT json FROM graph_nodes WHERE id = ?1",
3253            params![id],
3254            |row| row.get(0),
3255        )
3256        .optional()
3257        .map_err(storage_err)?;
3258    raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
3259        .transpose()
3260}
3261
3262fn storage_err(err: rusqlite::Error) -> OkError {
3263    OkError::Storage(err.to_string())
3264}
3265
3266fn occurrence_id(file_id: &str, value: &str, line: Option<u32>, flag: bool) -> String {
3267    use sha2::{Digest, Sha256};
3268    let mut hasher = Sha256::new();
3269    hasher.update(file_id.as_bytes());
3270    hasher.update(b":");
3271    hasher.update(value.as_bytes());
3272    hasher.update(b":");
3273    hasher.update(line.unwrap_or_default().to_string().as_bytes());
3274    hasher.update(b":");
3275    hasher.update(if flag { b"1" } else { b"0" });
3276    format!("{:x}", hasher.finalize())
3277}
3278
3279fn source_type_name(source_type: &EvidenceSourceType) -> &'static str {
3280    match source_type {
3281        EvidenceSourceType::TreeSitter => "tree_sitter",
3282        EvidenceSourceType::Scip => "scip",
3283        EvidenceSourceType::Lsp => "lsp",
3284        EvidenceSourceType::Regex => "regex",
3285        EvidenceSourceType::Lexical => "lexical",
3286        EvidenceSourceType::Semantic => "semantic",
3287        EvidenceSourceType::Runtime => "runtime",
3288        EvidenceSourceType::GitHistory => "git_history",
3289        EvidenceSourceType::StaticAnalysis => "static_analysis",
3290        EvidenceSourceType::ExternalIntegration => "external_integration",
3291        EvidenceSourceType::Heuristic => "heuristic",
3292    }
3293}
3294
3295#[cfg(test)]
3296mod tests {
3297    use super::{SqliteStore, SQLITE_GRAPH_SCHEMA_VERSION};
3298    use chrono::{TimeZone, Utc};
3299    use open_kioku_core::{
3300        AnalysisFact, ChurnEntityKind, CodeChunk, Confidence, EdgeId, Evidence, EvidenceId,
3301        EvidenceSourceType, File, FileId, GitChangeKind, GitCochangeEdge, GitCommitId,
3302        GitCommitRecord, GitFileTouch, GitSymbolTouch, GraphEdge, GraphEdgeType, GraphNode,
3303        GraphNodeType, HistoryRecordId, HistorySignalQuery, HistorySnapshot, IndexManifest,
3304        IndexQuality, Language, LineRange, NodeId, Owner, Repository, RepositoryId,
3305        ReviewerEvidence, ReviewerRole, SimilarChangeQuery, SimilarityEvidenceSource, Symbol,
3306        SymbolId, SymbolKind, SymbolOccurrence, HISTORY_SCHEMA_VERSION,
3307    };
3308    use open_kioku_storage::{
3309        GraphStore, HistoryStore, IndexData, MetadataStore, PartialIndexUpdate,
3310    };
3311    use rusqlite::{params, Connection};
3312    use std::collections::{BTreeMap, BTreeSet};
3313    use std::time::Duration;
3314
3315    fn make_store() -> SqliteStore {
3316        SqliteStore::open(":memory:").expect("in-memory store")
3317    }
3318
3319    fn make_file(id: &str, path: &str) -> File {
3320        File {
3321            id: FileId::new(id),
3322            repository_id: RepositoryId::new("repo"),
3323            path: path.into(),
3324            language: Language::Rust,
3325            size_bytes: 100,
3326            content_hash: format!("hash-{id}"),
3327            is_generated: false,
3328            is_vendor: false,
3329        }
3330    }
3331
3332    fn make_symbol(id: &str, name: &str, file_id: &str) -> Symbol {
3333        Symbol {
3334            id: SymbolId::new(id),
3335            name: name.into(),
3336            qualified_name: format!("module::{name}"),
3337            kind: SymbolKind::Function,
3338            file_id: FileId::new(file_id),
3339            range: Some(LineRange::single(1)),
3340            language: Language::Rust,
3341            confidence: Confidence::High,
3342            provenance: EvidenceSourceType::TreeSitter,
3343        }
3344    }
3345
3346    fn evidence() -> Evidence {
3347        Evidence {
3348            id: EvidenceId::new("ev-1"),
3349            source: "test".into(),
3350            source_type: EvidenceSourceType::Lexical,
3351            file_range: None,
3352            symbol_id: None,
3353            confidence: Confidence::Medium,
3354            message: "test evidence".into(),
3355            indexed_at: Utc::now(),
3356            ..Default::default()
3357        }
3358    }
3359
3360    fn make_manifest() -> IndexManifest {
3361        IndexManifest {
3362            repository: Repository {
3363                id: RepositoryId::new("repo"),
3364                name: "repo".into(),
3365                root: std::path::PathBuf::from("."),
3366                branch: None,
3367                commit: None,
3368                indexed_at: None,
3369            },
3370            file_count: 2,
3371            symbol_count: 2,
3372            chunk_count: 0,
3373            indexed_at: Utc::now(),
3374            schema_version: 1,
3375            index_mode: Default::default(),
3376            phase_reports: Vec::new(),
3377            quality: IndexQuality::default(),
3378        }
3379    }
3380
3381    fn history_snapshot() -> HistorySnapshot {
3382        let older_at = Utc.with_ymd_and_hms(2026, 5, 1, 12, 0, 0).unwrap();
3383        let newer_at = Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap();
3384        let older_id = GitCommitId::new("older");
3385        let newer_id = GitCommitId::new("newer");
3386        HistorySnapshot {
3387            schema_version: HISTORY_SCHEMA_VERSION,
3388            commits: vec![
3389                GitCommitRecord {
3390                    id: older_id.clone(),
3391                    parent_ids: Vec::new(),
3392                    author: Owner {
3393                        name: "Older Author".into(),
3394                        email: Some("older@example.com".into()),
3395                    },
3396                    committer: None,
3397                    authored_at: older_at,
3398                    committed_at: older_at,
3399                    summary: "Introduce library".into(),
3400                    message: "Introduce library".into(),
3401                    file_count: 2,
3402                },
3403                GitCommitRecord {
3404                    id: newer_id.clone(),
3405                    parent_ids: vec![older_id.clone()],
3406                    author: Owner {
3407                        name: "Newer Author".into(),
3408                        email: Some("newer@example.com".into()),
3409                    },
3410                    committer: None,
3411                    authored_at: newer_at,
3412                    committed_at: newer_at,
3413                    summary: "Refine library".into(),
3414                    message: "Refine library and tests".into(),
3415                    file_count: 3,
3416                },
3417            ],
3418            file_touches: vec![
3419                GitFileTouch {
3420                    id: HistoryRecordId::new("file-touch-older"),
3421                    commit_id: older_id.clone(),
3422                    path: "src/lib.rs".into(),
3423                    previous_path: None,
3424                    change_kind: GitChangeKind::Added,
3425                    additions: Some(20),
3426                    deletions: Some(0),
3427                    touched_at: older_at,
3428                },
3429                GitFileTouch {
3430                    id: HistoryRecordId::new("file-touch-newer"),
3431                    commit_id: newer_id.clone(),
3432                    path: "src/lib.rs".into(),
3433                    previous_path: None,
3434                    change_kind: GitChangeKind::Modified,
3435                    additions: Some(5),
3436                    deletions: Some(2),
3437                    touched_at: newer_at,
3438                },
3439            ],
3440            symbol_touches: vec![GitSymbolTouch {
3441                id: HistoryRecordId::new("symbol-touch-newer"),
3442                commit_id: newer_id.clone(),
3443                symbol_id: Some(SymbolId::new("symbol-1")),
3444                qualified_name: "crate::history_for_file".into(),
3445                file_path: "src/lib.rs".into(),
3446                change_kind: GitChangeKind::Modified,
3447                line_ranges: vec![LineRange { start: 4, end: 8 }],
3448                confidence: Confidence::Medium,
3449                uncertainty: vec!["historical coordinates may have shifted".into()],
3450                touched_at: newer_at,
3451            }],
3452            cochange_edges: vec![
3453                GitCochangeEdge {
3454                    id: HistoryRecordId::new("cochange-test"),
3455                    path: "src/lib.rs".into(),
3456                    cochanged_path: "tests/lib_test.rs".into(),
3457                    commit_count: 2,
3458                    recency_weight: 1.8,
3459                    last_changed_at: Some(newer_at),
3460                    sample_commits: vec![newer_id.clone(), older_id.clone()],
3461                    test_corun: true,
3462                },
3463                GitCochangeEdge {
3464                    id: HistoryRecordId::new("cochange-docs"),
3465                    path: "src/lib.rs".into(),
3466                    cochanged_path: "docs/library.md".into(),
3467                    commit_count: 1,
3468                    recency_weight: 0.5,
3469                    last_changed_at: Some(older_at),
3470                    sample_commits: vec![older_id],
3471                    test_corun: false,
3472                },
3473            ],
3474            reviewer_evidence: vec![ReviewerEvidence {
3475                id: HistoryRecordId::new("review-newer"),
3476                commit_id: Some(newer_id),
3477                path: None,
3478                reviewer: Owner {
3479                    name: "Reviewer".into(),
3480                    email: Some("reviewer@example.com".into()),
3481                },
3482                role: ReviewerRole::Reviewer,
3483                observed_at: newer_at,
3484                source: "git-trailer:reviewed-by".into(),
3485                confidence: Confidence::High,
3486            }],
3487        }
3488    }
3489
3490    fn similar_history_snapshot() -> HistorySnapshot {
3491        let intro_at = Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap();
3492        let target_at = Utc.with_ymd_and_hms(2026, 6, 2, 12, 0, 0).unwrap();
3493        let move_at = Utc.with_ymd_and_hms(2026, 6, 3, 12, 0, 0).unwrap();
3494        let docs_at = Utc.with_ymd_and_hms(2026, 6, 4, 12, 0, 0).unwrap();
3495        let intro_id = GitCommitId::new("auth-intro");
3496        let target_id = GitCommitId::new("auth-expiry-fix");
3497        let move_id = GitCommitId::new("auth-module-move");
3498        let docs_id = GitCommitId::new("token-docs");
3499
3500        HistorySnapshot {
3501            schema_version: HISTORY_SCHEMA_VERSION,
3502            commits: vec![
3503                GitCommitRecord {
3504                    id: intro_id.clone(),
3505                    parent_ids: Vec::new(),
3506                    author: Owner {
3507                        name: "Auth Dev".into(),
3508                        email: Some("auth@example.com".into()),
3509                    },
3510                    committer: None,
3511                    authored_at: intro_at,
3512                    committed_at: intro_at,
3513                    summary: "Add login token validation".into(),
3514                    message: "Add token validation for login requests".into(),
3515                    file_count: 1,
3516                },
3517                GitCommitRecord {
3518                    id: target_id.clone(),
3519                    parent_ids: vec![intro_id.clone()],
3520                    author: Owner {
3521                        name: "Auth Dev".into(),
3522                        email: Some("auth@example.com".into()),
3523                    },
3524                    committer: None,
3525                    authored_at: target_at,
3526                    committed_at: target_at,
3527                    summary: "Fix token expiration in login flow".into(),
3528                    message:
3529                        "Fix login token expiration by updating auth validation and auth tests"
3530                            .into(),
3531                    file_count: 2,
3532                },
3533                GitCommitRecord {
3534                    id: move_id.clone(),
3535                    parent_ids: vec![target_id.clone()],
3536                    author: Owner {
3537                        name: "Platform Dev".into(),
3538                        email: Some("platform@example.com".into()),
3539                    },
3540                    committer: None,
3541                    authored_at: move_at,
3542                    committed_at: move_at,
3543                    summary: "Move auth module".into(),
3544                    message: "Move auth module without behavior changes".into(),
3545                    file_count: 1,
3546                },
3547                GitCommitRecord {
3548                    id: docs_id.clone(),
3549                    parent_ids: vec![move_id.clone()],
3550                    author: Owner {
3551                        name: "Docs Dev".into(),
3552                        email: Some("docs@example.com".into()),
3553                    },
3554                    committer: None,
3555                    authored_at: docs_at,
3556                    committed_at: docs_at,
3557                    summary: "Update token glossary".into(),
3558                    message: "Refresh token wording in docs".into(),
3559                    file_count: 1,
3560                },
3561            ],
3562            file_touches: vec![
3563                GitFileTouch {
3564                    id: HistoryRecordId::new("intro-auth"),
3565                    commit_id: intro_id.clone(),
3566                    path: "src/auth.rs".into(),
3567                    previous_path: None,
3568                    change_kind: GitChangeKind::Added,
3569                    additions: Some(40),
3570                    deletions: Some(0),
3571                    touched_at: intro_at,
3572                },
3573                GitFileTouch {
3574                    id: HistoryRecordId::new("target-auth"),
3575                    commit_id: target_id.clone(),
3576                    path: "src/auth.rs".into(),
3577                    previous_path: None,
3578                    change_kind: GitChangeKind::Modified,
3579                    additions: Some(12),
3580                    deletions: Some(3),
3581                    touched_at: target_at,
3582                },
3583                GitFileTouch {
3584                    id: HistoryRecordId::new("target-tests"),
3585                    commit_id: target_id.clone(),
3586                    path: "tests/auth_flow.rs".into(),
3587                    previous_path: None,
3588                    change_kind: GitChangeKind::Modified,
3589                    additions: Some(18),
3590                    deletions: Some(1),
3591                    touched_at: target_at,
3592                },
3593                GitFileTouch {
3594                    id: HistoryRecordId::new("move-auth"),
3595                    commit_id: move_id.clone(),
3596                    path: "src/auth.rs".into(),
3597                    previous_path: None,
3598                    change_kind: GitChangeKind::Modified,
3599                    additions: Some(3),
3600                    deletions: Some(3),
3601                    touched_at: move_at,
3602                },
3603                GitFileTouch {
3604                    id: HistoryRecordId::new("docs-token"),
3605                    commit_id: docs_id.clone(),
3606                    path: "docs/tokens.md".into(),
3607                    previous_path: None,
3608                    change_kind: GitChangeKind::Modified,
3609                    additions: Some(5),
3610                    deletions: Some(1),
3611                    touched_at: docs_at,
3612                },
3613            ],
3614            symbol_touches: vec![GitSymbolTouch {
3615                id: HistoryRecordId::new("target-symbol"),
3616                commit_id: target_id.clone(),
3617                symbol_id: Some(SymbolId::new("auth-validate-token")),
3618                qualified_name: "crate::auth::validate_token".into(),
3619                file_path: "src/auth.rs".into(),
3620                change_kind: GitChangeKind::Modified,
3621                line_ranges: vec![LineRange { start: 10, end: 18 }],
3622                confidence: Confidence::Medium,
3623                uncertainty: Vec::new(),
3624                touched_at: target_at,
3625            }],
3626            cochange_edges: vec![GitCochangeEdge {
3627                id: HistoryRecordId::new("auth-tests-cochange"),
3628                path: "src/auth.rs".into(),
3629                cochanged_path: "tests/auth_flow.rs".into(),
3630                commit_count: 2,
3631                recency_weight: 1.9,
3632                last_changed_at: Some(target_at),
3633                sample_commits: vec![target_id],
3634                test_corun: true,
3635            }],
3636            reviewer_evidence: Vec::new(),
3637        }
3638    }
3639
3640    #[test]
3641    fn history_migration_upgrades_legacy_database_idempotently() {
3642        let dir = tempfile::tempdir().unwrap();
3643        let path = dir.path().join("index.sqlite");
3644        let legacy = Connection::open(&path).unwrap();
3645        legacy
3646            .execute_batch(
3647                r#"
3648                PRAGMA user_version = 0;
3649                CREATE TABLE analysis_facts (
3650                  id TEXT PRIMARY KEY,
3651                  file_id TEXT NOT NULL,
3652                  source_type TEXT NOT NULL,
3653                  target TEXT NOT NULL,
3654                  json TEXT NOT NULL
3655                );
3656                INSERT INTO analysis_facts(id, file_id, source_type, target, json)
3657                VALUES('legacy-git', 'f1', 'git_history', 'tests/lib_test.rs', '{}');
3658                "#,
3659            )
3660            .unwrap();
3661        drop(legacy);
3662
3663        let store = SqliteStore::open(&path).unwrap();
3664        store.initialize().unwrap();
3665
3666        let conn = store.connection.lock().unwrap();
3667        let version: i64 = conn
3668            .pragma_query_value(None, "user_version", |row| row.get(0))
3669            .unwrap();
3670        assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
3671        let history_table_count: i64 = conn
3672            .query_row(
3673                "SELECT COUNT(*) FROM sqlite_master
3674                 WHERE type = 'table'
3675                   AND name IN (
3676                     'git_commits',
3677                     'git_file_touches',
3678                     'git_symbol_touches',
3679                     'git_cochange_edges',
3680                     'git_review_events',
3681                     'history_hotspots'
3682                   )",
3683                [],
3684                |row| row.get(0),
3685            )
3686            .unwrap();
3687        assert_eq!(history_table_count, 6);
3688        let legacy_fact_count: i64 = conn
3689            .query_row("SELECT COUNT(*) FROM analysis_facts", [], |row| row.get(0))
3690            .unwrap();
3691        assert_eq!(legacy_fact_count, 1);
3692    }
3693
3694    #[test]
3695    fn newer_sqlite_schema_is_rejected_without_mutation() {
3696        let dir = tempfile::tempdir().unwrap();
3697        let path = dir.path().join("future.sqlite");
3698        let future = Connection::open(&path).unwrap();
3699        future
3700            .execute_batch(
3701                r#"
3702                PRAGMA user_version = 3;
3703                CREATE TABLE future_history_marker (id INTEGER PRIMARY KEY);
3704                "#,
3705            )
3706            .unwrap();
3707        drop(future);
3708
3709        let error = match SqliteStore::open(&path) {
3710            Ok(_) => panic!("newer schema should be rejected"),
3711            Err(error) => error.to_string(),
3712        };
3713        assert!(error.contains("newer than supported version 2"));
3714
3715        let conn = Connection::open(&path).unwrap();
3716        let current_table_count: i64 = conn
3717            .query_row(
3718                "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'manifests'",
3719                [],
3720                |row| row.get(0),
3721            )
3722            .unwrap();
3723        assert_eq!(current_table_count, 0);
3724        let future_marker_count: i64 = conn
3725            .query_row(
3726                "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'future_history_marker'",
3727                [],
3728                |row| row.get(0),
3729            )
3730            .unwrap();
3731        assert_eq!(future_marker_count, 1);
3732    }
3733
3734    #[test]
3735    fn history_snapshot_queries_return_typed_evidence() {
3736        let store = make_store();
3737        store.put_history_snapshot(&history_snapshot()).unwrap();
3738
3739        let recent = store.recent_commits(10).unwrap();
3740        assert_eq!(recent.len(), 2);
3741        assert_eq!(recent[0].id.0, "newer");
3742
3743        let neighbors = store
3744            .cochange_neighbors(std::path::Path::new("src/lib.rs"), 10)
3745            .unwrap();
3746        assert_eq!(neighbors.len(), 2);
3747        assert_eq!(
3748            neighbors[0].cochanged_path,
3749            std::path::Path::new("tests/lib_test.rs")
3750        );
3751
3752        let summary = store
3753            .history_for_file(std::path::Path::new("src/lib.rs"), 10)
3754            .unwrap();
3755        assert_eq!(summary.recent_commits.len(), 2);
3756        assert_eq!(summary.file_touches.len(), 2);
3757        assert_eq!(summary.symbol_touches.len(), 1);
3758        assert_eq!(summary.cochange_neighbors.len(), 2);
3759        assert_eq!(summary.reviewer_evidence.len(), 1);
3760        assert!(!summary.truncated);
3761        assert!(summary.uncertainty.is_empty());
3762
3763        let truncated = store
3764            .history_for_file(std::path::Path::new("src/lib.rs"), 1)
3765            .unwrap();
3766        assert!(truncated.truncated);
3767        assert!(truncated
3768            .uncertainty
3769            .iter()
3770            .any(|note| note.contains("truncated")));
3771    }
3772
3773    #[test]
3774    fn similar_changes_rank_and_explain_multi_signal_history() {
3775        let store = make_store();
3776        store
3777            .put_history_snapshot(&similar_history_snapshot())
3778            .unwrap();
3779
3780        let report = store
3781            .similar_changes(
3782                &SimilarChangeQuery {
3783                    task: Some("fix token expiration".into()),
3784                    paths: vec!["src/auth.rs".into()],
3785                    symbols: vec!["validate_token".into()],
3786                },
3787                5,
3788            )
3789            .unwrap();
3790
3791        assert!(!report.truncated);
3792        assert_eq!(report.hits[0].change.commit.id.0, "auth-expiry-fix");
3793        assert!(report.hits[0].score > 0.90, "{:#?}", report.hits[0]);
3794        assert_eq!(report.hits[0].confidence, Confidence::High);
3795        let source_types = report.hits[0]
3796            .evidence
3797            .iter()
3798            .map(|evidence| evidence.source_type)
3799            .collect::<BTreeSet<_>>();
3800        assert!(source_types.contains(&SimilarityEvidenceSource::TaskText));
3801        assert!(source_types.contains(&SimilarityEvidenceSource::CommitMetadata));
3802        assert!(source_types.contains(&SimilarityEvidenceSource::Path));
3803        assert!(source_types.contains(&SimilarityEvidenceSource::Symbol));
3804        assert!(source_types.contains(&SimilarityEvidenceSource::Cochange));
3805        assert!(source_types.contains(&SimilarityEvidenceSource::Churn));
3806
3807        let weak = report
3808            .hits
3809            .iter()
3810            .find(|hit| hit.change.commit.id.0 == "token-docs")
3811            .expect("weak task-text hit should still be visible");
3812        assert_eq!(weak.confidence, Confidence::Low);
3813        assert!(weak
3814            .uncertainty
3815            .iter()
3816            .any(|note| note.contains("low-confidence")));
3817    }
3818
3819    #[test]
3820    fn history_score_components_are_bounded_and_named() {
3821        let store = make_store();
3822        store.put_history_snapshot(&history_snapshot()).unwrap();
3823
3824        let summary = store
3825            .history_score_components(
3826                &HistorySignalQuery {
3827                    path: "src/lib.rs".into(),
3828                    task: Some("update lib history behavior".into()),
3829                    symbols: vec!["crate::history_for_file".into()],
3830                },
3831                10,
3832            )
3833            .unwrap();
3834
3835        let signals = summary
3836            .components
3837            .iter()
3838            .map(|component| component.signal.as_str())
3839            .collect::<BTreeSet<_>>();
3840        assert!(signals.contains("history_churn"), "{summary:#?}");
3841        assert!(signals.contains("similar_change_overlap"), "{summary:#?}");
3842        assert!(signals.contains("reviewer_affinity"), "{summary:#?}");
3843        assert!(summary
3844            .components
3845            .iter()
3846            .all(|component| component.contribution <= 0.18));
3847        assert!(!summary.evidence_refs.is_empty());
3848        assert!(summary.reasons.iter().any(|reason| {
3849            reason.contains("history churn") || reason.contains("similar change")
3850        }));
3851    }
3852
3853    #[test]
3854    fn similar_changes_limit_is_deterministic_and_reports_truncation() {
3855        let store = make_store();
3856        store
3857            .put_history_snapshot(&similar_history_snapshot())
3858            .unwrap();
3859
3860        let report = store
3861            .similar_changes(
3862                &SimilarChangeQuery {
3863                    task: Some("fix token expiration".into()),
3864                    paths: vec!["src/auth.rs".into()],
3865                    symbols: vec!["validate_token".into()],
3866                },
3867                1,
3868            )
3869            .unwrap();
3870
3871        assert!(report.truncated);
3872        assert_eq!(report.hits.len(), 1);
3873        assert_eq!(report.hits[0].change.commit.id.0, "auth-expiry-fix");
3874        assert!(report
3875            .uncertainty
3876            .iter()
3877            .any(|note| note.contains("truncated to 1")));
3878    }
3879
3880    #[test]
3881    fn churn_summaries_are_materialized_with_deterministic_windows() {
3882        let store = make_store();
3883        store.put_history_snapshot(&history_snapshot()).unwrap();
3884
3885        let file = store
3886            .churn_for_file(std::path::Path::new("src/lib.rs"))
3887            .unwrap();
3888        assert_eq!(file.entity_kind, ChurnEntityKind::File);
3889        assert_eq!(file.stats.all_time, 2);
3890        assert_eq!(file.stats.last_30d, 1);
3891        assert_eq!(file.stats.last_90d, 2);
3892        assert_eq!(file.stats.touch_count, 2);
3893        assert!(file.stats.recency_weighted > 1.4);
3894        assert!(file.stats.hotspot_score > file.stats.recency_weighted);
3895        assert_eq!(
3896            file.generated_at,
3897            Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap()
3898        );
3899        assert_eq!(file.confidence, Confidence::Exact);
3900
3901        let module = store.churn_for_module(std::path::Path::new("src")).unwrap();
3902        assert_eq!(module.entity_kind, ChurnEntityKind::Module);
3903        assert_eq!(module.stats.all_time, 2);
3904        assert_eq!(module.stats.last_30d, 1);
3905        assert_eq!(module.confidence, Confidence::Medium);
3906        assert!(module
3907            .uncertainty
3908            .iter()
3909            .any(|note| note.contains("aggregated from persisted file touches")));
3910
3911        let symbol_id = SymbolId::new("symbol-1");
3912        let symbol = store.churn_for_symbol(&symbol_id).unwrap();
3913        assert_eq!(symbol.entity_kind, ChurnEntityKind::Symbol);
3914        assert_eq!(symbol.stats.all_time, 1);
3915        assert_eq!(symbol.stats.last_30d, 1);
3916        assert_eq!(symbol.stats.last_90d, 1);
3917        assert_eq!(symbol.confidence, Confidence::Medium);
3918        assert_eq!(
3919            symbol.qualified_name.as_deref(),
3920            Some("crate::history_for_file")
3921        );
3922        assert!(symbol
3923            .uncertainty
3924            .iter()
3925            .any(|note| note.contains("historical coordinates may have shifted")));
3926
3927        let missing = store
3928            .churn_for_symbol(&SymbolId::new("missing-symbol"))
3929            .unwrap();
3930        assert_eq!(missing.stats.touch_count, 0);
3931        assert_eq!(missing.confidence, Confidence::Low);
3932        assert!(missing
3933            .uncertainty
3934            .iter()
3935            .any(|note| note.contains("no persisted symbol-level churn")));
3936    }
3937
3938    #[test]
3939    fn hotspot_ordering_and_lookup_use_persisted_summary_table() {
3940        let store = make_store();
3941        let mut snapshot = history_snapshot();
3942        snapshot.file_touches.push(GitFileTouch {
3943            id: HistoryRecordId::new("file-touch-docs"),
3944            commit_id: GitCommitId::new("older"),
3945            path: "docs/readme.md".into(),
3946            previous_path: None,
3947            change_kind: GitChangeKind::Modified,
3948            additions: Some(1),
3949            deletions: Some(0),
3950            touched_at: Utc.with_ymd_and_hms(2026, 5, 1, 12, 0, 0).unwrap(),
3951        });
3952        store.put_history_snapshot(&snapshot).unwrap();
3953
3954        let conn = store.connection.lock().unwrap();
3955        let mut stmt = conn
3956            .prepare(
3957                "SELECT entity_key FROM history_hotspots
3958                 WHERE entity_kind = 'file'
3959                 ORDER BY hotspot_score DESC, touch_count DESC, entity_key
3960                 LIMIT 2",
3961            )
3962            .unwrap();
3963        let rows = stmt
3964            .query_map([], |row| row.get::<_, String>(0))
3965            .unwrap()
3966            .collect::<std::result::Result<Vec<_>, _>>()
3967            .unwrap();
3968        drop(stmt);
3969        drop(conn);
3970        assert_eq!(rows, vec!["src/lib.rs", "docs/readme.md"]);
3971
3972        let mut elapsed = Vec::new();
3973        for _ in 0..40 {
3974            let started = std::time::Instant::now();
3975            let summary = store
3976                .churn_for_file(std::path::Path::new("src/lib.rs"))
3977                .unwrap();
3978            assert_eq!(summary.stats.touch_count, 2);
3979            elapsed.push(started.elapsed());
3980        }
3981        elapsed.sort();
3982        let p95 = elapsed[(elapsed.len() * 95 / 100).min(elapsed.len() - 1)];
3983        assert!(
3984            p95 < Duration::from_millis(200),
3985            "persisted churn lookup p95 was {p95:?}"
3986        );
3987    }
3988
3989    #[test]
3990    fn churn_summaries_follow_rename_aliases_without_module_double_counting() {
3991        let store = make_store();
3992        let mut snapshot = history_snapshot();
3993        snapshot.file_touches[0].path = "src/old.rs".into();
3994        snapshot.file_touches[1].previous_path = Some("src/old.rs".into());
3995        snapshot.file_touches[1].change_kind = GitChangeKind::Renamed;
3996        store.put_history_snapshot(&snapshot).unwrap();
3997
3998        let current = store
3999            .churn_for_file(std::path::Path::new("src/lib.rs"))
4000            .unwrap();
4001        let historical = store
4002            .churn_for_file(std::path::Path::new("src/old.rs"))
4003            .unwrap();
4004        assert_eq!(current.stats.all_time, 2);
4005        assert_eq!(historical.stats.all_time, 2);
4006        assert_eq!(current.stats.last_30d, 1);
4007        assert_eq!(historical.stats.last_30d, 1);
4008
4009        let module = store.churn_for_module(std::path::Path::new("src")).unwrap();
4010        assert_eq!(module.stats.all_time, 2);
4011        assert_eq!(module.stats.last_90d, 2);
4012
4013        let root = store.churn_for_module(std::path::Path::new(".")).unwrap();
4014        assert_eq!(root.key, "__root__");
4015        assert_eq!(root.stats.all_time, 2);
4016    }
4017
4018    #[test]
4019    fn provenance_queries_return_first_last_and_explicit_symbol_uncertainty() {
4020        let store = make_store();
4021        let file = make_file("file-1", "src/lib.rs");
4022        let symbol = make_symbol("symbol-1", "history_for_file", "file-1");
4023        let mut unmapped_symbol = make_symbol("symbol-2", "unmapped", "file-1");
4024        unmapped_symbol.range = None;
4025        let manifest = make_manifest();
4026        store
4027            .replace_index(IndexData {
4028                manifest: &manifest,
4029                files: std::slice::from_ref(&file),
4030                symbols: &[symbol.clone(), unmapped_symbol.clone()],
4031                chunks: &[],
4032                tests: &[],
4033                imports: &[],
4034                occurrences: &[],
4035                analysis_facts: &[],
4036            })
4037            .unwrap();
4038        store.put_history_snapshot(&history_snapshot()).unwrap();
4039
4040        let file_provenance = store
4041            .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
4042            .unwrap();
4043        assert_eq!(
4044            file_provenance
4045                .first_seen
4046                .as_ref()
4047                .map(|touch| touch.commit.id.0.as_str()),
4048            Some("older")
4049        );
4050        assert_eq!(
4051            file_provenance
4052                .last_touched
4053                .as_ref()
4054                .map(|touch| touch.commit.id.0.as_str()),
4055            Some("newer")
4056        );
4057        assert_eq!(file_provenance.recent_touches.len(), 2);
4058        assert_eq!(file_provenance.confidence, Confidence::Exact);
4059
4060        let symbol_provenance = store.provenance_for_symbol(&symbol.id, 10).unwrap();
4061        assert_eq!(symbol_provenance.recent_touches.len(), 1);
4062        assert_eq!(symbol_provenance.confidence, Confidence::Medium);
4063        assert_eq!(
4064            symbol_provenance.recent_touches[0].commit.author.name,
4065            "Newer Author"
4066        );
4067        assert_eq!(
4068            symbol_provenance.recent_touches[0].line_ranges,
4069            vec![LineRange { start: 4, end: 8 }]
4070        );
4071        assert!(symbol_provenance
4072            .uncertainty
4073            .iter()
4074            .any(|note| note.contains("earliest line-mapped touch")));
4075
4076        let unmapped = store
4077            .provenance_for_symbol(&unmapped_symbol.id, 10)
4078            .unwrap();
4079        assert!(unmapped.first_seen.is_none());
4080        assert!(unmapped.last_touched.is_none());
4081        assert!(unmapped.recent_touches.is_empty());
4082        assert_eq!(unmapped.confidence, Confidence::Low);
4083        assert!(unmapped
4084            .uncertainty
4085            .iter()
4086            .any(|note| note.contains("no persisted line-level commit mapping")));
4087        assert!(unmapped
4088            .uncertainty
4089            .iter()
4090            .any(|note| note.contains("has no line range")));
4091    }
4092
4093    #[test]
4094    fn path_provenance_follows_rename_aliases_in_both_directions() {
4095        let store = make_store();
4096        let mut snapshot = history_snapshot();
4097        snapshot.file_touches[0].path = "src/old.rs".into();
4098        snapshot.file_touches[1].previous_path = Some("src/old.rs".into());
4099        snapshot.file_touches[1].change_kind = GitChangeKind::Renamed;
4100        store.put_history_snapshot(&snapshot).unwrap();
4101
4102        let current = store
4103            .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
4104            .unwrap();
4105        let historical = store
4106            .provenance_for_path(std::path::Path::new("src/old.rs"), 10)
4107            .unwrap();
4108
4109        assert_eq!(current.recent_touches.len(), 2);
4110        assert_eq!(historical.recent_touches.len(), 2);
4111        assert_eq!(
4112            current
4113                .first_seen
4114                .as_ref()
4115                .map(|touch| touch.path.as_path()),
4116            Some(std::path::Path::new("src/old.rs"))
4117        );
4118    }
4119
4120    #[test]
4121    fn invalid_snapshot_does_not_replace_existing_history() {
4122        let store = make_store();
4123        let snapshot = history_snapshot();
4124        store.put_history_snapshot(&snapshot).unwrap();
4125
4126        let mut invalid = snapshot;
4127        invalid.file_touches[0].commit_id = GitCommitId::new("missing");
4128        let error = store
4129            .put_history_snapshot(&invalid)
4130            .unwrap_err()
4131            .to_string();
4132        assert!(error.contains("references missing commit `missing`"));
4133
4134        let recent = store.recent_commits(10).unwrap();
4135        assert_eq!(recent.len(), 2);
4136        assert_eq!(recent[0].id.0, "newer");
4137
4138        store
4139            .put_history_snapshot(&HistorySnapshot::empty())
4140            .unwrap();
4141        assert!(store.recent_commits(10).unwrap().is_empty());
4142    }
4143
4144    #[test]
4145    fn replace_index_and_list_files() {
4146        let store = make_store();
4147        let file1 = make_file("f1", "src/main.rs");
4148        let file2 = make_file("f2", "src/lib.rs");
4149        let sym1 = make_symbol("s1", "main_fn", "f1");
4150
4151        let manifest = make_manifest();
4152        let files = vec![file1.clone(), file2.clone()];
4153        let symbols = vec![sym1.clone()];
4154
4155        let data = IndexData {
4156            manifest: &manifest,
4157            files: &files,
4158            symbols: &symbols,
4159            occurrences: &[],
4160            chunks: &[],
4161            imports: &[],
4162            tests: &[],
4163            analysis_facts: &[],
4164        };
4165        store.replace_index(data).unwrap();
4166
4167        let files_list = store.list_files(100, 0).unwrap();
4168        assert_eq!(files_list.len(), 2);
4169
4170        let by_path = store
4171            .get_file_by_path(&std::path::PathBuf::from("src/main.rs"))
4172            .unwrap();
4173        assert!(by_path.is_some());
4174        assert_eq!(by_path.unwrap().id, file1.id);
4175    }
4176
4177    #[test]
4178    fn partial_replace_updates_changed_files_and_cleans_deleted_graph_edges() {
4179        let store = make_store();
4180        let manifest = make_manifest();
4181        let file1 = make_file("f1", "src/main.rs");
4182        let file2 = make_file("f2", "src/lib.rs");
4183        let sym1 = make_symbol("s1", "main_fn", "f1");
4184        let sym2 = make_symbol("s2", "lib_fn", "f2");
4185        let old_chunk = CodeChunk {
4186            id: "c1".into(),
4187            file_id: file1.id.clone(),
4188            range: LineRange { start: 1, end: 1 },
4189            language: Language::Rust,
4190            text: "fn main_fn() {}".into(),
4191            symbol_id: Some(sym1.id.clone()),
4192        };
4193        store
4194            .replace_index(IndexData {
4195                manifest: &manifest,
4196                files: &[file1.clone(), file2.clone()],
4197                symbols: &[sym1.clone(), sym2.clone()],
4198                chunks: std::slice::from_ref(&old_chunk),
4199                tests: &[],
4200                imports: &[],
4201                occurrences: &[SymbolOccurrence {
4202                    symbol_id: sym1.id.clone(),
4203                    file_id: file1.id.clone(),
4204                    range: Some(LineRange::single(1)),
4205                    is_definition: true,
4206                    confidence: Confidence::Exact,
4207                    provenance: EvidenceSourceType::StaticAnalysis,
4208                }],
4209                analysis_facts: &[],
4210            })
4211            .unwrap();
4212        let node1 = GraphNode {
4213            id: NodeId::new("symbol:s1"),
4214            node_type: GraphNodeType::Function,
4215            label: "main_fn".into(),
4216            file_id: Some(file1.id.clone()),
4217            symbol_id: Some(sym1.id.clone()),
4218            ..Default::default()
4219        };
4220        let node2 = GraphNode {
4221            id: NodeId::new("symbol:s2"),
4222            node_type: GraphNodeType::Function,
4223            label: "lib_fn".into(),
4224            file_id: Some(file2.id.clone()),
4225            symbol_id: Some(sym2.id.clone()),
4226            ..Default::default()
4227        };
4228        let edge = GraphEdge {
4229            id: EdgeId::new("edge:s1-s2"),
4230            from: node1.id.clone(),
4231            to: node2.id.clone(),
4232            edge_type: GraphEdgeType::References,
4233            evidence: evidence(),
4234            ..Default::default()
4235        };
4236        let node3 = GraphNode {
4237            id: NodeId::new("external:a"),
4238            node_type: GraphNodeType::Module,
4239            label: "external a".into(),
4240            ..Default::default()
4241        };
4242        let node4 = GraphNode {
4243            id: NodeId::new("external:b"),
4244            node_type: GraphNodeType::Module,
4245            label: "external b".into(),
4246            ..Default::default()
4247        };
4248        let mut source_evidence = evidence();
4249        source_evidence.source = "src/main.rs".into();
4250        let source_edge = GraphEdge {
4251            id: EdgeId::new("edge:source-file"),
4252            from: node3.id.clone(),
4253            to: node4.id.clone(),
4254            edge_type: GraphEdgeType::RelatedToTicket,
4255            evidence: source_evidence,
4256            ..Default::default()
4257        };
4258        store
4259            .replace_graph(
4260                &[node1, node2.clone(), node3.clone(), node4.clone()],
4261                &[edge.clone(), source_edge],
4262            )
4263            .unwrap();
4264
4265        let mut updated_file2 = file2.clone();
4266        updated_file2.content_hash = "new-hash".into();
4267        let updated_sym2 = make_symbol("s2b", "lib_fn_new", "f2");
4268        let updated_chunk = CodeChunk {
4269            id: "c2".into(),
4270            file_id: updated_file2.id.clone(),
4271            range: LineRange { start: 2, end: 2 },
4272            language: Language::Rust,
4273            text: "fn lib_fn_new() {}".into(),
4274            symbol_id: Some(updated_sym2.id.clone()),
4275        };
4276        let updated_node2 = GraphNode {
4277            id: NodeId::new("symbol:s2b"),
4278            node_type: GraphNodeType::Function,
4279            label: "lib_fn_new".into(),
4280            file_id: Some(updated_file2.id.clone()),
4281            symbol_id: Some(updated_sym2.id.clone()),
4282            ..Default::default()
4283        };
4284        store
4285            .replace_files_index(PartialIndexUpdate {
4286                manifest: &manifest,
4287                changed_files: std::slice::from_ref(&updated_file2),
4288                deleted_file_ids: std::slice::from_ref(&file1.id),
4289                symbols: std::slice::from_ref(&updated_sym2),
4290                chunks: std::slice::from_ref(&updated_chunk),
4291                tests: &[],
4292                imports: &[],
4293                occurrences: &[],
4294                analysis_facts: &[],
4295                graph_nodes: std::slice::from_ref(&updated_node2),
4296                graph_edges: &[],
4297            })
4298            .unwrap();
4299
4300        assert!(store
4301            .get_file_by_path(std::path::Path::new("src/main.rs"))
4302            .unwrap()
4303            .is_none());
4304        assert_eq!(
4305            store
4306                .get_file_by_path(std::path::Path::new("src/lib.rs"))
4307                .unwrap()
4308                .unwrap()
4309                .content_hash,
4310            "new-hash"
4311        );
4312        assert!(store.symbol_by_id(&sym1.id).unwrap().is_none());
4313        assert!(store.symbol_by_id(&updated_sym2.id).unwrap().is_some());
4314        assert!(store.chunks_for_file(&file1.id).unwrap().is_empty());
4315        assert_eq!(store.chunks_for_file(&file2.id).unwrap()[0].id, "c2");
4316        let edge_count: i64 = store
4317            .connection
4318            .lock()
4319            .unwrap()
4320            .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
4321            .unwrap();
4322        assert_eq!(edge_count, 0);
4323        assert!(store.node_by_id("symbol:s1").unwrap().is_none());
4324        assert!(store.node_by_id("symbol:s2b").unwrap().is_some());
4325    }
4326
4327    #[test]
4328    fn partial_replace_rolls_back_on_insert_failure() {
4329        let store = make_store();
4330        let manifest = make_manifest();
4331        let file = make_file("f1", "src/lib.rs");
4332        store
4333            .replace_index(IndexData {
4334                manifest: &manifest,
4335                files: std::slice::from_ref(&file),
4336                symbols: &[],
4337                chunks: &[],
4338                tests: &[],
4339                imports: &[],
4340                occurrences: &[],
4341                analysis_facts: &[],
4342            })
4343            .unwrap();
4344
4345        let duplicate_a = make_file("f2", "src/dup.rs");
4346        let mut duplicate_b = make_file("f3", "src/dup.rs");
4347        duplicate_b.content_hash = "other".into();
4348        let error = store
4349            .replace_files_index(PartialIndexUpdate {
4350                manifest: &manifest,
4351                changed_files: &[duplicate_a, duplicate_b],
4352                deleted_file_ids: std::slice::from_ref(&file.id),
4353                symbols: &[],
4354                chunks: &[],
4355                tests: &[],
4356                imports: &[],
4357                occurrences: &[],
4358                analysis_facts: &[],
4359                graph_nodes: &[],
4360                graph_edges: &[],
4361            })
4362            .unwrap_err()
4363            .to_string();
4364        assert!(error.contains("UNIQUE") || error.contains("constraint"));
4365        assert!(store
4366            .get_file_by_path(std::path::Path::new("src/lib.rs"))
4367            .unwrap()
4368            .is_some());
4369        assert!(store
4370            .get_file_by_path(std::path::Path::new("src/dup.rs"))
4371            .unwrap()
4372            .is_none());
4373    }
4374
4375    #[test]
4376    fn replace_index_persists_analysis_facts() {
4377        let store = make_store();
4378        let file = make_file("f1", "src/handler.rs");
4379        let manifest = make_manifest();
4380        let runtime_fact = AnalysisFact {
4381            id: "runtime-1".into(),
4382            file_id: file.id.clone(),
4383            symbol_id: None,
4384            target: "GET /api/orders".into(),
4385            target_kind: GraphNodeType::Endpoint,
4386            edge_type: GraphEdgeType::ExposesEndpoint,
4387            range: Some(LineRange::single(12)),
4388            confidence: Confidence::High,
4389            source: "open-kioku-runtime:.ok/runtime/spans.jsonl".into(),
4390            source_type: EvidenceSourceType::Runtime,
4391            message: "runtime endpoint observed in local trace artifact".into(),
4392        };
4393        let static_fact = AnalysisFact {
4394            id: "static-1".into(),
4395            file_id: file.id.clone(),
4396            symbol_id: None,
4397            target: "orders".into(),
4398            target_kind: GraphNodeType::DatabaseTable,
4399            edge_type: GraphEdgeType::ReadsTable,
4400            range: None,
4401            confidence: Confidence::Medium,
4402            source: "open-kioku-static".into(),
4403            source_type: EvidenceSourceType::StaticAnalysis,
4404            message: "static fact".into(),
4405        };
4406        let git_fact = AnalysisFact {
4407            id: "git-1".into(),
4408            file_id: file.id.clone(),
4409            symbol_id: None,
4410            target: "tests/handler_test.rs".into(),
4411            target_kind: GraphNodeType::Test,
4412            edge_type: GraphEdgeType::ChangedBy,
4413            range: None,
4414            confidence: Confidence::High,
4415            source: "git-history:abc123".into(),
4416            source_type: EvidenceSourceType::GitHistory,
4417            message: "git co-change observed in 1 commit(s), recency weight 1.00".into(),
4418        };
4419
4420        store
4421            .replace_index(IndexData {
4422                manifest: &manifest,
4423                files: &[file],
4424                symbols: &[],
4425                occurrences: &[],
4426                chunks: &[],
4427                imports: &[],
4428                tests: &[],
4429                analysis_facts: &[runtime_fact.clone(), static_fact, git_fact.clone()],
4430            })
4431            .unwrap();
4432
4433        let runtime = store
4434            .analysis_facts(Some(EvidenceSourceType::Runtime), 10)
4435            .unwrap();
4436        assert_eq!(runtime.len(), 1);
4437        assert_eq!(runtime[0].id, runtime_fact.id);
4438        assert_eq!(runtime[0].target, runtime_fact.target);
4439        let git = store
4440            .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
4441            .unwrap();
4442        assert_eq!(git.len(), 1);
4443        assert_eq!(git[0].id, git_fact.id);
4444        assert_eq!(git[0].target, git_fact.target);
4445        let all = store.analysis_facts(None, 10).unwrap();
4446        assert_eq!(all.len(), 3);
4447    }
4448
4449    #[test]
4450    fn replace_index_preserves_typed_and_legacy_history() {
4451        let store = make_store();
4452        store.put_history_snapshot(&history_snapshot()).unwrap();
4453
4454        let file = make_file("f1", "src/lib.rs");
4455        let manifest = make_manifest();
4456        let git_fact = AnalysisFact {
4457            id: "legacy-git-1".into(),
4458            file_id: file.id.clone(),
4459            symbol_id: None,
4460            target: "tests/lib_test.rs".into(),
4461            target_kind: GraphNodeType::Test,
4462            edge_type: GraphEdgeType::ChangedBy,
4463            range: None,
4464            confidence: Confidence::High,
4465            source: "git-history:newer".into(),
4466            source_type: EvidenceSourceType::GitHistory,
4467            message: "legacy co-change compatibility fact".into(),
4468        };
4469
4470        for _ in 0..2 {
4471            store
4472                .replace_index(IndexData {
4473                    manifest: &manifest,
4474                    files: std::slice::from_ref(&file),
4475                    symbols: &[],
4476                    occurrences: &[],
4477                    chunks: &[],
4478                    imports: &[],
4479                    tests: &[],
4480                    analysis_facts: std::slice::from_ref(&git_fact),
4481                })
4482                .unwrap();
4483        }
4484
4485        assert_eq!(store.recent_commits(10).unwrap().len(), 2);
4486        let summary = store
4487            .history_for_file(std::path::Path::new("src/lib.rs"), 10)
4488            .unwrap();
4489        assert_eq!(summary.file_touches.len(), 2);
4490        let legacy = store
4491            .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
4492            .unwrap();
4493        assert_eq!(legacy.len(), 1);
4494        assert_eq!(legacy[0].id, git_fact.id);
4495    }
4496
4497    #[test]
4498    fn list_symbols_with_filter() {
4499        let store = make_store();
4500        let file = make_file("f1", "src/lib.rs");
4501        let sym_a = make_symbol("s1", "alpha_handler", "f1");
4502        let sym_b = make_symbol("s2", "beta_worker", "f1");
4503        let manifest = make_manifest();
4504        let files = vec![file];
4505        let symbols = vec![sym_a, sym_b];
4506        let data = IndexData {
4507            manifest: &manifest,
4508            files: &files,
4509            symbols: &symbols,
4510            occurrences: &[],
4511            chunks: &[],
4512            imports: &[],
4513            tests: &[],
4514            analysis_facts: &[],
4515        };
4516        store.replace_index(data).unwrap();
4517
4518        let all = store.list_symbols(None, 100, 0).unwrap();
4519        assert_eq!(all.len(), 2);
4520
4521        let filtered = store.list_symbols(Some("alpha"), 10, 0).unwrap();
4522        assert_eq!(filtered.len(), 1);
4523        assert_eq!(filtered[0].name, "alpha_handler");
4524    }
4525
4526    #[test]
4527    fn replace_graph_and_neighbors() {
4528        let store = make_store();
4529        // First we need an index so that the graph tables exist.
4530        let file = make_file("f1", "src/lib.rs");
4531        let manifest = make_manifest();
4532        let files = vec![file];
4533        let data = IndexData {
4534            manifest: &manifest,
4535            files: &files,
4536            symbols: &[],
4537            occurrences: &[],
4538            chunks: &[],
4539            imports: &[],
4540            tests: &[],
4541            analysis_facts: &[],
4542        };
4543        store.replace_index(data).unwrap();
4544
4545        let node_a = GraphNode {
4546            id: NodeId::new("file:src/lib.rs"),
4547            node_type: GraphNodeType::File,
4548            label: "src/lib.rs".into(),
4549            file_id: Some(FileId::new("f1")),
4550            symbol_id: None,
4551            ..Default::default()
4552        };
4553        let node_b = GraphNode {
4554            id: NodeId::new("symbol:s1"),
4555            node_type: GraphNodeType::Function,
4556            label: "worker".into(),
4557            file_id: Some(FileId::new("f1")),
4558            symbol_id: Some(SymbolId::new("s1")),
4559            ..Default::default()
4560        };
4561        let edge = GraphEdge {
4562            id: EdgeId::new("e1"),
4563            from: node_a.id.clone(),
4564            to: node_b.id.clone(),
4565            edge_type: GraphEdgeType::Defines,
4566            evidence: evidence(),
4567            ..Default::default()
4568        };
4569
4570        store
4571            .replace_graph(
4572                &[node_a.clone(), node_b.clone()],
4573                std::slice::from_ref(&edge),
4574            )
4575            .unwrap();
4576
4577        let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
4578        assert_eq!(edges.len(), 1);
4579        assert_eq!(edges[0].id.0, "e1");
4580        assert!(nodes.iter().any(|n| n.id == node_a.id));
4581    }
4582
4583    #[test]
4584    fn graph_facts_with_properties_and_confidence_metadata_round_trip() {
4585        let store = make_store();
4586        let file = make_file("f1", "src/lib.rs");
4587        let manifest = make_manifest();
4588        let files = vec![file];
4589        let data = IndexData {
4590            manifest: &manifest,
4591            files: &files,
4592            symbols: &[],
4593            occurrences: &[],
4594            chunks: &[],
4595            imports: &[],
4596            tests: &[],
4597            analysis_facts: &[],
4598        };
4599        store.replace_index(data).unwrap();
4600
4601        let node_a = GraphNode {
4602            id: NodeId::new("file:src/lib.rs"),
4603            node_type: GraphNodeType::File,
4604            label: "src/lib.rs".into(),
4605            file_id: Some(FileId::new("f1")),
4606            properties: BTreeMap::from([("package".into(), serde_json::json!("open-kioku"))]),
4607            schema_version: Some("graph-v1".into()),
4608            source_pass: Some("tree_sitter".into()),
4609            index_mode: Some("full".into()),
4610            extractor_version: Some("test-extractor".into()),
4611            ambiguity: vec!["generated file status unknown".into()],
4612            quality_notes: vec!["file path verified".into()],
4613            ..Default::default()
4614        };
4615        let node_b = GraphNode {
4616            id: NodeId::new("symbol:s1"),
4617            node_type: GraphNodeType::Function,
4618            label: "worker".into(),
4619            file_id: Some(FileId::new("f1")),
4620            symbol_id: Some(SymbolId::new("s1")),
4621            ..Default::default()
4622        };
4623        let mut edge_evidence = evidence();
4624        edge_evidence.confidence_score = Some(0.98);
4625        edge_evidence.confidence_reason = Some("exact symbol occurrence".into());
4626        edge_evidence.freshness = Some("fresh".into());
4627        let edge = GraphEdge {
4628            id: EdgeId::new("e1"),
4629            from: node_a.id.clone(),
4630            to: node_b.id.clone(),
4631            edge_type: GraphEdgeType::Defines,
4632            evidence: edge_evidence,
4633            properties: BTreeMap::from([("relation".into(), serde_json::json!("definition"))]),
4634            schema_version: Some("graph-v1".into()),
4635            source_pass: Some("scip".into()),
4636            index_mode: Some("full".into()),
4637            extractor_version: Some("test-scip".into()),
4638            ambiguity: vec!["macro expansion not modeled".into()],
4639            quality_notes: vec!["exact definition edge".into()],
4640        };
4641
4642        store
4643            .replace_graph(
4644                &[node_a.clone(), node_b.clone()],
4645                std::slice::from_ref(&edge),
4646            )
4647            .unwrap();
4648
4649        let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
4650        let stored_node = nodes.iter().find(|node| node.id == node_a.id).unwrap();
4651        assert_eq!(stored_node.properties, node_a.properties);
4652        assert_eq!(stored_node.schema_version.as_deref(), Some("graph-v1"));
4653        assert_eq!(stored_node.source_pass.as_deref(), Some("tree_sitter"));
4654        assert_eq!(stored_node.quality_notes, vec!["file path verified"]);
4655
4656        assert_eq!(edges.len(), 1);
4657        let stored_edge = &edges[0];
4658        assert_eq!(stored_edge.properties, edge.properties);
4659        assert_eq!(stored_edge.schema_version.as_deref(), Some("graph-v1"));
4660        assert_eq!(stored_edge.evidence.confidence_score, Some(0.98));
4661        assert_eq!(
4662            stored_edge.evidence.confidence_reason.as_deref(),
4663            Some("exact symbol occurrence")
4664        );
4665        assert_eq!(stored_edge.evidence.freshness.as_deref(), Some("fresh"));
4666
4667        let indexed_confidence: String = store
4668            .connection
4669            .lock()
4670            .unwrap()
4671            .query_row(
4672                "SELECT confidence FROM graph_edges WHERE id = 'e1'",
4673                [],
4674                |row| row.get(0),
4675            )
4676            .unwrap();
4677        assert_eq!(indexed_confidence, "Medium");
4678    }
4679
4680    #[test]
4681    fn shortest_path_finds_direct_route() {
4682        let store = make_store();
4683        let file = make_file("f1", "src/lib.rs");
4684        let manifest = make_manifest();
4685        let files = vec![file];
4686        let data = IndexData {
4687            manifest: &manifest,
4688            files: &files,
4689            symbols: &[],
4690            occurrences: &[],
4691            chunks: &[],
4692            imports: &[],
4693            tests: &[],
4694            analysis_facts: &[],
4695        };
4696        store.replace_index(data).unwrap();
4697
4698        let node_a = GraphNode {
4699            id: NodeId::new("a"),
4700            node_type: GraphNodeType::File,
4701            label: "a".into(),
4702            file_id: None,
4703            symbol_id: None,
4704            ..Default::default()
4705        };
4706        let node_b = GraphNode {
4707            id: NodeId::new("b"),
4708            node_type: GraphNodeType::File,
4709            label: "b".into(),
4710            file_id: None,
4711            symbol_id: None,
4712            ..Default::default()
4713        };
4714        let edge = GraphEdge {
4715            id: EdgeId::new("a-b"),
4716            from: node_a.id.clone(),
4717            to: node_b.id.clone(),
4718            edge_type: GraphEdgeType::Defines,
4719            evidence: evidence(),
4720            ..Default::default()
4721        };
4722        store.replace_graph(&[node_a, node_b], &[edge]).unwrap();
4723
4724        let path = store.shortest_path("a", "b", 5).unwrap();
4725        assert_eq!(path.len(), 1);
4726        assert_eq!(path[0].id.0, "a-b");
4727    }
4728
4729    #[test]
4730    fn shortest_path_returns_empty_when_no_route() {
4731        let store = make_store();
4732        let file = make_file("f1", "src/lib.rs");
4733        let manifest = make_manifest();
4734        let files = vec![file];
4735        let data = IndexData {
4736            manifest: &manifest,
4737            files: &files,
4738            symbols: &[],
4739            occurrences: &[],
4740            chunks: &[],
4741            imports: &[],
4742            tests: &[],
4743            analysis_facts: &[],
4744        };
4745        store.replace_index(data).unwrap();
4746        store.replace_graph(&[], &[]).unwrap();
4747
4748        let path = store.shortest_path("x", "y", 5).unwrap();
4749        assert!(path.is_empty());
4750    }
4751
4752    #[test]
4753    fn test_old_graph_tables_migrate_and_replace_graph_backfills_columns() {
4754        let store = make_store();
4755        let legacy_file = GraphNode {
4756            id: NodeId::new("legacy_file"),
4757            node_type: GraphNodeType::File,
4758            label: "legacy.rs".into(),
4759            file_id: Some(FileId::new("f1")),
4760            ..Default::default()
4761        };
4762        let legacy_symbol = GraphNode {
4763            id: NodeId::new("legacy_symbol"),
4764            node_type: GraphNodeType::Function,
4765            label: "legacy_fn".into(),
4766            symbol_id: Some(SymbolId::new("s1")),
4767            ..Default::default()
4768        };
4769        let mut legacy_evidence = evidence();
4770        legacy_evidence.source_type = EvidenceSourceType::Scip;
4771        legacy_evidence.source = "index.scip".into();
4772        let legacy_edge = GraphEdge {
4773            id: EdgeId::new("legacy_edge"),
4774            from: legacy_file.id.clone(),
4775            to: legacy_symbol.id.clone(),
4776            edge_type: GraphEdgeType::Defines,
4777            evidence: legacy_evidence,
4778            ..Default::default()
4779        };
4780        {
4781            let conn = store.connection.lock().unwrap();
4782            conn.execute("DROP TABLE graph_nodes", []).unwrap();
4783            conn.execute("DROP TABLE graph_edges", []).unwrap();
4784            conn.execute(
4785                "CREATE TABLE graph_nodes(id TEXT PRIMARY KEY, label TEXT, json TEXT)",
4786                [],
4787            )
4788            .unwrap();
4789            conn.execute("CREATE TABLE graph_edges(id TEXT PRIMARY KEY, from_id TEXT, to_id TEXT, edge_type TEXT, json TEXT)", []).unwrap();
4790            conn.execute(
4791                "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
4792                params![
4793                    legacy_file.id.0.as_str(),
4794                    legacy_file.label.as_str(),
4795                    serde_json::to_string(&legacy_file).unwrap(),
4796                ],
4797            )
4798            .unwrap();
4799            conn.execute(
4800                "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
4801                params![
4802                    legacy_symbol.id.0.as_str(),
4803                    legacy_symbol.label.as_str(),
4804                    serde_json::to_string(&legacy_symbol).unwrap(),
4805                ],
4806            )
4807            .unwrap();
4808            conn.execute(
4809                "INSERT INTO graph_edges(id, from_id, to_id, edge_type, json)
4810                 VALUES(?1, ?2, ?3, '', ?4)",
4811                params![
4812                    legacy_edge.id.0.as_str(),
4813                    legacy_edge.from.0.as_str(),
4814                    legacy_edge.to.0.as_str(),
4815                    serde_json::to_string(&legacy_edge).unwrap(),
4816                ],
4817            )
4818            .unwrap();
4819        }
4820        store.initialize().unwrap();
4821        store.initialize().unwrap();
4822
4823        let migrated_nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
4824        assert_eq!(migrated_nodes.len(), 1);
4825        assert_eq!(migrated_nodes[0].id.0, "legacy_file");
4826
4827        let migrated_edges = store.edges_by_type(GraphEdgeType::Defines, 10, 0).unwrap();
4828        assert_eq!(migrated_edges.len(), 1);
4829        assert_eq!(migrated_edges[0].id.0, "legacy_edge");
4830        let migrated_between = store
4831            .graph_edges_between("legacy_file", "legacy_symbol", 10)
4832            .unwrap();
4833        assert_eq!(migrated_between.len(), 1);
4834
4835        let migrated_counts = store.graph_schema_counts().unwrap();
4836        assert_eq!(migrated_counts.node_types.get("File"), Some(&1));
4837        assert_eq!(migrated_counts.edge_types.get("Defines"), Some(&1));
4838
4839        let node = GraphNode {
4840            id: NodeId::new("test_node"),
4841            node_type: GraphNodeType::File,
4842            label: "test".into(),
4843            ..Default::default()
4844        };
4845        store.replace_graph(&[node], &[]).unwrap();
4846
4847        let count: i64 = store
4848            .connection
4849            .lock()
4850            .unwrap()
4851            .query_row(
4852                "SELECT COUNT(*) FROM graph_nodes WHERE node_type = 'File'",
4853                [],
4854                |r| r.get(0),
4855            )
4856            .unwrap();
4857        assert_eq!(count, 1);
4858
4859        let version: i64 = store
4860            .connection
4861            .lock()
4862            .unwrap()
4863            .pragma_query_value(None, "user_version", |row| row.get(0))
4864            .unwrap();
4865        assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
4866
4867        let index_count: i64 = store
4868            .connection
4869            .lock()
4870            .unwrap()
4871            .query_row(
4872                "SELECT COUNT(*) FROM sqlite_master
4873                 WHERE type = 'index'
4874                   AND name IN (
4875                     'idx_graph_nodes_type',
4876                     'idx_graph_nodes_file',
4877                     'idx_graph_nodes_symbol',
4878                     'idx_graph_edges_type',
4879                     'idx_graph_edges_from_type',
4880                     'idx_graph_edges_to_type',
4881                     'idx_graph_edges_source_type'
4882                   )",
4883                [],
4884                |row| row.get(0),
4885            )
4886            .unwrap();
4887        assert_eq!(index_count, 7);
4888    }
4889
4890    #[test]
4891    fn test_nodes_by_type_uses_indexed_column() {
4892        let store = make_store();
4893        let node1 = GraphNode {
4894            id: NodeId::new("n1"),
4895            node_type: GraphNodeType::File,
4896            ..Default::default()
4897        };
4898        let node2 = GraphNode {
4899            id: NodeId::new("n2"),
4900            node_type: GraphNodeType::File,
4901            ..Default::default()
4902        };
4903        let node3 = GraphNode {
4904            id: NodeId::new("n3"),
4905            node_type: GraphNodeType::Function,
4906            ..Default::default()
4907        };
4908        store
4909            .replace_graph(&[node2.clone(), node3.clone(), node1.clone()], &[])
4910            .unwrap();
4911
4912        let nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
4913        assert_eq!(nodes.len(), 2);
4914        assert_eq!(nodes[0].id.0, "n1");
4915        assert_eq!(nodes[1].id.0, "n2");
4916    }
4917
4918    #[test]
4919    fn test_edges_by_type_uses_indexed_column() {
4920        let store = make_store();
4921        let node1 = GraphNode {
4922            id: NodeId::new("n1"),
4923            ..Default::default()
4924        };
4925        let node2 = GraphNode {
4926            id: NodeId::new("n2"),
4927            ..Default::default()
4928        };
4929        let edge1 = GraphEdge {
4930            id: EdgeId::new("e1"),
4931            from: NodeId::new("n1"),
4932            to: NodeId::new("n2"),
4933            edge_type: GraphEdgeType::Calls,
4934            ..Default::default()
4935        };
4936        let edge2 = GraphEdge {
4937            id: EdgeId::new("e2"),
4938            from: NodeId::new("n1"),
4939            to: NodeId::new("n2"),
4940            edge_type: GraphEdgeType::Calls,
4941            ..Default::default()
4942        };
4943        let edge3 = GraphEdge {
4944            id: EdgeId::new("e3"),
4945            from: NodeId::new("n1"),
4946            to: NodeId::new("n2"),
4947            edge_type: GraphEdgeType::Defines,
4948            ..Default::default()
4949        };
4950        store
4951            .replace_graph(
4952                &[node1, node2],
4953                &[edge2.clone(), edge3.clone(), edge1.clone()],
4954            )
4955            .unwrap();
4956
4957        let edges = store.edges_by_type(GraphEdgeType::Calls, 10, 0).unwrap();
4958        assert_eq!(edges.len(), 2);
4959        assert_eq!(edges[0].id.0, "e1");
4960        assert_eq!(edges[1].id.0, "e2");
4961    }
4962
4963    #[test]
4964    fn test_graph_edges_between_respects_limit() {
4965        let store = make_store();
4966        let node1 = GraphNode {
4967            id: NodeId::new("n1"),
4968            ..Default::default()
4969        };
4970        let node2 = GraphNode {
4971            id: NodeId::new("n2"),
4972            ..Default::default()
4973        };
4974        let edge1 = GraphEdge {
4975            id: EdgeId::new("e1"),
4976            from: NodeId::new("n1"),
4977            to: NodeId::new("n2"),
4978            ..Default::default()
4979        };
4980        let edge2 = GraphEdge {
4981            id: EdgeId::new("e2"),
4982            from: NodeId::new("n1"),
4983            to: NodeId::new("n2"),
4984            ..Default::default()
4985        };
4986        store
4987            .replace_graph(&[node1, node2], &[edge2.clone(), edge1.clone()])
4988            .unwrap();
4989
4990        let edges = store.graph_edges_between("n1", "n2", 1).unwrap();
4991        assert_eq!(edges.len(), 1);
4992        assert_eq!(edges[0].id.0, "e1");
4993    }
4994
4995    #[test]
4996    fn test_query_limit_is_capped() {
4997        assert_eq!(super::clamp_limit(0), 100);
4998        assert_eq!(super::clamp_limit(5), 5);
4999        assert_eq!(super::clamp_limit(5000), 1000);
5000    }
5001
5002    #[test]
5003    fn test_graph_schema_counts_returns_sorted_type_counts() {
5004        let store = make_store();
5005        let node1 = GraphNode {
5006            id: NodeId::new("n1"),
5007            node_type: GraphNodeType::File,
5008            ..Default::default()
5009        };
5010        let node2 = GraphNode {
5011            id: NodeId::new("n2"),
5012            node_type: GraphNodeType::File,
5013            ..Default::default()
5014        };
5015        let node3 = GraphNode {
5016            id: NodeId::new("n3"),
5017            node_type: GraphNodeType::Function,
5018            ..Default::default()
5019        };
5020        let edge1 = GraphEdge {
5021            id: EdgeId::new("e1"),
5022            from: NodeId::new("n1"),
5023            to: NodeId::new("n2"),
5024            edge_type: GraphEdgeType::Calls,
5025            ..Default::default()
5026        };
5027        store
5028            .replace_graph(&[node1, node2, node3], &[edge1])
5029            .unwrap();
5030
5031        let counts = store.graph_schema_counts().unwrap();
5032        assert_eq!(counts.node_types.get("File"), Some(&2));
5033        assert_eq!(counts.node_types.get("Function"), Some(&1));
5034        assert_eq!(counts.edge_types.get("Calls"), Some(&1));
5035    }
5036
5037    #[test]
5038    fn test_graph_counts_returns_total_nodes_and_edges() {
5039        let store = make_store();
5040        let node1 = GraphNode {
5041            id: NodeId::new("n1"),
5042            node_type: GraphNodeType::File,
5043            ..Default::default()
5044        };
5045        let node2 = GraphNode {
5046            id: NodeId::new("n2"),
5047            node_type: GraphNodeType::File,
5048            ..Default::default()
5049        };
5050        let edge1 = GraphEdge {
5051            id: EdgeId::new("e1"),
5052            from: NodeId::new("n1"),
5053            to: NodeId::new("n2"),
5054            edge_type: GraphEdgeType::Calls,
5055            ..Default::default()
5056        };
5057        store.replace_graph(&[node1, node2], &[edge1]).unwrap();
5058
5059        let overall = store.graph_counts().unwrap();
5060        assert_eq!(overall.nodes, 2);
5061        assert_eq!(overall.edges, 1);
5062    }
5063}