1use chrono::{DateTime, Utc};
2use open_kioku_core::{
3 AnalysisFact, ChurnEntityKind, ChurnStats, ChurnSummary, CodeChunk, Confidence,
4 EvidenceSourceType, File, FileId, FileProvenance, GitCochangeEdge, GitCommitId,
5 GitCommitRecord, GitFileTouch, GitSymbolTouch, GraphEdge, GraphEdgeType, GraphNode,
6 GraphNodeType, HistoricalChangeSummary, HistoryRecordId, HistorySnapshot, HistorySummary,
7 Import, IndexManifest, ProvenanceTouch, SimilarChangeHit, SimilarChangeQuery,
8 SimilarChangeReport, SimilarityEvidence, SimilarityEvidenceSource, Symbol, SymbolId,
9 SymbolOccurrence, SymbolProvenance, TestTarget, HISTORY_SCHEMA_VERSION,
10};
11use open_kioku_errors::{OkError, Result};
12use open_kioku_storage::{
13 GraphCounts, GraphSchemaCounts, GraphStore, HistoryStore, IndexData, MetadataStore,
14 PartialIndexUpdate,
15};
16use rusqlite::{params, Connection, OptionalExtension, Transaction};
17use std::collections::{BTreeMap, BTreeSet};
18use std::path::{Path, PathBuf};
19use std::sync::Mutex;
20
21const SQLITE_HISTORY_SCHEMA_VERSION: i64 = 1;
22pub const SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION: i64 = 2;
23const SQLITE_GRAPH_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
24const SQLITE_SUPPORTED_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
25
26const HISTORY_SCHEMA_V1: &str = r#"
27CREATE TABLE IF NOT EXISTS git_commits (
28 id TEXT PRIMARY KEY,
29 authored_at TEXT NOT NULL,
30 committed_at TEXT NOT NULL,
31 author_email TEXT,
32 json TEXT NOT NULL
33);
34CREATE INDEX IF NOT EXISTS idx_git_commits_committed_at
35 ON git_commits(committed_at DESC, id);
36CREATE INDEX IF NOT EXISTS idx_git_commits_author_email
37 ON git_commits(author_email);
38
39CREATE TABLE IF NOT EXISTS git_file_touches (
40 id TEXT PRIMARY KEY,
41 commit_id TEXT NOT NULL,
42 path TEXT NOT NULL,
43 previous_path TEXT,
44 touched_at TEXT NOT NULL,
45 json TEXT NOT NULL,
46 FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
47);
48CREATE INDEX IF NOT EXISTS idx_git_file_touches_path
49 ON git_file_touches(path, touched_at DESC);
50CREATE INDEX IF NOT EXISTS idx_git_file_touches_previous_path
51 ON git_file_touches(previous_path, touched_at DESC);
52CREATE INDEX IF NOT EXISTS idx_git_file_touches_commit
53 ON git_file_touches(commit_id);
54
55CREATE TABLE IF NOT EXISTS git_symbol_touches (
56 id TEXT PRIMARY KEY,
57 commit_id TEXT NOT NULL,
58 symbol_id TEXT,
59 qualified_name TEXT NOT NULL,
60 file_path TEXT NOT NULL,
61 touched_at TEXT NOT NULL,
62 json TEXT NOT NULL,
63 FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
64);
65CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_file
66 ON git_symbol_touches(file_path, touched_at DESC);
67CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_symbol
68 ON git_symbol_touches(symbol_id, touched_at DESC);
69CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_commit
70 ON git_symbol_touches(commit_id);
71
72CREATE TABLE IF NOT EXISTS git_cochange_edges (
73 id TEXT PRIMARY KEY,
74 path TEXT NOT NULL,
75 cochanged_path TEXT NOT NULL,
76 commit_count INTEGER NOT NULL,
77 recency_weight REAL NOT NULL,
78 last_changed_at TEXT,
79 json TEXT NOT NULL,
80 UNIQUE(path, cochanged_path)
81);
82CREATE INDEX IF NOT EXISTS idx_git_cochange_edges_path
83 ON git_cochange_edges(path, recency_weight DESC, commit_count DESC);
84
85CREATE TABLE IF NOT EXISTS git_review_events (
86 id TEXT PRIMARY KEY,
87 commit_id TEXT,
88 path TEXT,
89 reviewer_identity TEXT NOT NULL,
90 observed_at TEXT NOT NULL,
91 json TEXT NOT NULL
92);
93CREATE INDEX IF NOT EXISTS idx_git_review_events_path
94 ON git_review_events(path, observed_at DESC);
95CREATE INDEX IF NOT EXISTS idx_git_review_events_commit
96 ON git_review_events(commit_id, observed_at DESC);
97CREATE INDEX IF NOT EXISTS idx_git_review_events_reviewer
98 ON git_review_events(reviewer_identity, observed_at DESC);
99
100CREATE TABLE IF NOT EXISTS history_hotspots (
101 entity_kind TEXT NOT NULL,
102 entity_key TEXT NOT NULL,
103 path TEXT,
104 symbol_id TEXT,
105 qualified_name TEXT,
106 hotspot_score REAL NOT NULL,
107 touch_count INTEGER NOT NULL,
108 generated_at TEXT NOT NULL,
109 json TEXT NOT NULL,
110 PRIMARY KEY(entity_kind, entity_key)
111);
112CREATE INDEX IF NOT EXISTS idx_history_hotspots_kind_score
113 ON history_hotspots(entity_kind, hotspot_score DESC, touch_count DESC, entity_key);
114CREATE INDEX IF NOT EXISTS idx_history_hotspots_path
115 ON history_hotspots(path);
116CREATE INDEX IF NOT EXISTS idx_history_hotspots_symbol
117 ON history_hotspots(symbol_id);
118"#;
119
120pub struct SqliteStore {
121 path: PathBuf,
122 connection: Mutex<Connection>,
123}
124
125impl SqliteStore {
126 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
127 let path = path.as_ref().to_path_buf();
128 if let Some(parent) = path.parent() {
129 std::fs::create_dir_all(parent)?;
130 }
131 let connection = Connection::open_with_flags(
132 &path,
133 rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE
134 | rusqlite::OpenFlags::SQLITE_OPEN_CREATE
135 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
136 )
137 .map_err(storage_err)?;
138 let store = Self {
139 path,
140 connection: Mutex::new(connection),
141 };
142 store.initialize()?;
143 Ok(store)
144 }
145
146 pub fn path(&self) -> &Path {
147 &self.path
148 }
149
150 fn churn_by_kind_and_key<F>(
151 &self,
152 kind: ChurnEntityKind,
153 key: &str,
154 missing: F,
155 ) -> Result<ChurnSummary>
156 where
157 F: FnOnce() -> ChurnSummary,
158 {
159 let conn = self
160 .connection
161 .lock()
162 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
163 let raw = conn
164 .query_row(
165 "SELECT json FROM history_hotspots WHERE entity_kind = ?1 AND entity_key = ?2",
166 params![churn_entity_kind_key(kind), key],
167 |row| row.get::<_, String>(0),
168 )
169 .optional()
170 .map_err(storage_err)?;
171 match raw {
172 Some(raw) => Ok(serde_json::from_str(&raw)?),
173 None => Ok(missing()),
174 }
175 }
176}
177
178impl MetadataStore for SqliteStore {
179 fn initialize(&self) -> Result<()> {
180 let mut conn = self
181 .connection
182 .lock()
183 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
184 ensure_supported_sqlite_schema(&conn)?;
185 conn.execute_batch(
186 r#"
187 PRAGMA journal_mode = WAL;
188 PRAGMA foreign_keys = ON;
189 CREATE TABLE IF NOT EXISTS manifests (
190 id INTEGER PRIMARY KEY CHECK (id = 1),
191 json TEXT NOT NULL
192 );
193 CREATE TABLE IF NOT EXISTS files (
194 id TEXT PRIMARY KEY,
195 path TEXT NOT NULL UNIQUE,
196 json TEXT NOT NULL
197 );
198 CREATE TABLE IF NOT EXISTS symbols (
199 id TEXT PRIMARY KEY,
200 name TEXT NOT NULL,
201 qualified_name TEXT NOT NULL,
202 file_id TEXT NOT NULL,
203 json TEXT NOT NULL
204 );
205 CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
206 CREATE TABLE IF NOT EXISTS chunks (
207 id TEXT PRIMARY KEY,
208 file_id TEXT NOT NULL,
209 start_line INTEGER NOT NULL,
210 end_line INTEGER NOT NULL,
211 text TEXT NOT NULL,
212 json TEXT NOT NULL
213 );
214 CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
215 CREATE TABLE IF NOT EXISTS tests (
216 id TEXT PRIMARY KEY,
217 file_id TEXT NOT NULL,
218 json TEXT NOT NULL
219 );
220 CREATE INDEX IF NOT EXISTS idx_tests_file ON tests(file_id);
221 CREATE TABLE IF NOT EXISTS imports (
222 id TEXT PRIMARY KEY,
223 file_id TEXT NOT NULL,
224 imported TEXT NOT NULL,
225 json TEXT NOT NULL
226 );
227 CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file_id);
228 CREATE TABLE IF NOT EXISTS occurrences (
229 id TEXT PRIMARY KEY,
230 symbol_id TEXT NOT NULL,
231 file_id TEXT NOT NULL,
232 is_definition INTEGER NOT NULL,
233 json TEXT NOT NULL
234 );
235 CREATE INDEX IF NOT EXISTS idx_occurrences_symbol ON occurrences(symbol_id);
236 CREATE INDEX IF NOT EXISTS idx_occurrences_file ON occurrences(file_id);
237 CREATE TABLE IF NOT EXISTS analysis_facts (
238 id TEXT PRIMARY KEY,
239 file_id TEXT NOT NULL,
240 source_type TEXT NOT NULL,
241 target TEXT NOT NULL,
242 json TEXT NOT NULL
243 );
244 CREATE INDEX IF NOT EXISTS idx_analysis_facts_file ON analysis_facts(file_id);
245 CREATE INDEX IF NOT EXISTS idx_analysis_facts_source ON analysis_facts(source_type);
246 CREATE TABLE IF NOT EXISTS vector_targets (
247 id TEXT PRIMARY KEY,
248 file_id TEXT NOT NULL,
249 target_kind TEXT NOT NULL,
250 content_hash TEXT NOT NULL,
251 vector_id INTEGER NOT NULL,
252 model TEXT NOT NULL,
253 dimensions INTEGER NOT NULL,
254 json TEXT NOT NULL
255 );
256 CREATE INDEX IF NOT EXISTS idx_vector_targets_file ON vector_targets(file_id);
257 CREATE TABLE IF NOT EXISTS embedding_cache (
258 cache_key TEXT PRIMARY KEY,
259 target_id TEXT NOT NULL,
260 content_hash TEXT NOT NULL,
261 model TEXT NOT NULL,
262 dimensions INTEGER NOT NULL,
263 json TEXT NOT NULL
264 );
265 CREATE TABLE IF NOT EXISTS semantic_index_runs (
266 id TEXT PRIMARY KEY,
267 status TEXT NOT NULL,
268 model TEXT NOT NULL,
269 dimensions INTEGER NOT NULL,
270 vector_count INTEGER NOT NULL,
271 created_at TEXT NOT NULL,
272 json TEXT NOT NULL
273 );
274 CREATE TABLE IF NOT EXISTS semantic_coverage (
275 id TEXT PRIMARY KEY,
276 target_kind TEXT NOT NULL,
277 indexed_count INTEGER NOT NULL,
278 stale_count INTEGER NOT NULL,
279 failed_count INTEGER NOT NULL,
280 json TEXT NOT NULL
281 );
282 CREATE TABLE IF NOT EXISTS graph_nodes (
283 id TEXT PRIMARY KEY,
284 label TEXT NOT NULL,
285 node_type TEXT DEFAULT '',
286 file_id TEXT DEFAULT '',
287 symbol_id TEXT DEFAULT '',
288 json TEXT NOT NULL
289 );
290 CREATE TABLE IF NOT EXISTS graph_edges (
291 id TEXT PRIMARY KEY,
292 from_id TEXT NOT NULL,
293 to_id TEXT NOT NULL,
294 edge_type TEXT NOT NULL,
295 confidence TEXT DEFAULT '',
296 source_type TEXT DEFAULT '',
297 source_file TEXT DEFAULT '',
298 json TEXT NOT NULL
299 );
300 CREATE INDEX IF NOT EXISTS idx_graph_edges_from ON graph_edges(from_id);
301 CREATE INDEX IF NOT EXISTS idx_graph_edges_to ON graph_edges(to_id);
302 "#,
303 )
304 .map_err(storage_err)?;
305 migrate_history_schema(&mut conn)?;
306 migrate_graph_schema(&mut conn)?;
307 Ok(())
308 }
309
310 fn put_manifest(&self, manifest: &IndexManifest) -> Result<()> {
311 let conn = self
312 .connection
313 .lock()
314 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
315 let json = serde_json::to_string(manifest)?;
316 conn.execute(
317 "INSERT INTO manifests(id, json) VALUES(1, ?1) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
318 params![json],
319 )
320 .map_err(storage_err)?;
321 Ok(())
322 }
323
324 fn manifest(&self) -> Result<Option<IndexManifest>> {
325 let conn = self
326 .connection
327 .lock()
328 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
329 let raw: Option<String> = conn
330 .query_row("SELECT json FROM manifests WHERE id = 1", [], |row| {
331 row.get(0)
332 })
333 .optional()
334 .map_err(storage_err)?;
335 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
336 .transpose()
337 }
338
339 fn replace_index(&self, data: IndexData<'_>) -> Result<()> {
340 let mut conn = self
341 .connection
342 .lock()
343 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
344 let tx = conn.transaction().map_err(storage_err)?;
345 tx.execute("DELETE FROM occurrences", [])
346 .map_err(storage_err)?;
347 tx.execute("DELETE FROM analysis_facts", [])
348 .map_err(storage_err)?;
349 tx.execute("DELETE FROM imports", []).map_err(storage_err)?;
350 tx.execute("DELETE FROM tests", []).map_err(storage_err)?;
351 tx.execute("DELETE FROM chunks", []).map_err(storage_err)?;
352 tx.execute("DELETE FROM symbols", []).map_err(storage_err)?;
353 tx.execute("DELETE FROM files", []).map_err(storage_err)?;
354 tx.execute("DELETE FROM manifests", [])
355 .map_err(storage_err)?;
356 tx.execute(
357 "INSERT INTO manifests(id, json) VALUES(1, ?1)",
358 params![serde_json::to_string(data.manifest)?],
359 )
360 .map_err(storage_err)?;
361 insert_index_rows(
362 &tx,
363 IndexRows {
364 files: data.files,
365 symbols: data.symbols,
366 chunks: data.chunks,
367 tests: data.tests,
368 imports: data.imports,
369 occurrences: data.occurrences,
370 analysis_facts: data.analysis_facts,
371 },
372 )?;
373 tx.commit().map_err(storage_err)?;
374 Ok(())
375 }
376
377 fn replace_files_index(&self, update: PartialIndexUpdate<'_>) -> Result<()> {
378 let mut conn = self
379 .connection
380 .lock()
381 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
382 let tx = conn.transaction().map_err(storage_err)?;
383 let affected_file_ids = update
384 .changed_files
385 .iter()
386 .map(|file| file.id.clone())
387 .chain(update.deleted_file_ids.iter().cloned())
388 .collect::<BTreeSet<_>>();
389 let mut affected_file_paths = update
390 .changed_files
391 .iter()
392 .map(|file| file.path.to_string_lossy().to_string())
393 .collect::<BTreeSet<_>>();
394 for file_id in &affected_file_ids {
395 let path: Option<String> = tx
396 .query_row(
397 "SELECT path FROM files WHERE id = ?1",
398 params![&file_id.0],
399 |row| row.get(0),
400 )
401 .optional()
402 .map_err(storage_err)?;
403 if let Some(path) = path {
404 affected_file_paths.insert(path);
405 }
406 }
407
408 let mut affected_symbol_ids = update
409 .symbols
410 .iter()
411 .map(|symbol| symbol.id.clone())
412 .collect::<BTreeSet<_>>();
413 for file_id in &affected_file_ids {
414 let mut stmt = tx
415 .prepare("SELECT id FROM symbols WHERE file_id = ?1")
416 .map_err(storage_err)?;
417 let rows = stmt
418 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
419 .map_err(storage_err)?;
420 for row in rows {
421 affected_symbol_ids.insert(SymbolId::new(row.map_err(storage_err)?));
422 }
423 }
424
425 let mut affected_node_ids = update
426 .graph_nodes
427 .iter()
428 .map(|node| node.id.0.clone())
429 .collect::<BTreeSet<_>>();
430 for file_id in &affected_file_ids {
431 let mut stmt = tx
432 .prepare("SELECT id FROM graph_nodes WHERE file_id = ?1")
433 .map_err(storage_err)?;
434 let rows = stmt
435 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
436 .map_err(storage_err)?;
437 for row in rows {
438 affected_node_ids.insert(row.map_err(storage_err)?);
439 }
440 }
441 for symbol_id in &affected_symbol_ids {
442 let mut stmt = tx
443 .prepare("SELECT id FROM graph_nodes WHERE symbol_id = ?1")
444 .map_err(storage_err)?;
445 let rows = stmt
446 .query_map(params![&symbol_id.0], |row| row.get::<_, String>(0))
447 .map_err(storage_err)?;
448 for row in rows {
449 affected_node_ids.insert(row.map_err(storage_err)?);
450 }
451 }
452
453 tx.execute(
454 "INSERT INTO manifests(id, json) VALUES(1, ?1)
455 ON CONFLICT(id) DO UPDATE SET json = excluded.json",
456 params![serde_json::to_string(update.manifest)?],
457 )
458 .map_err(storage_err)?;
459
460 for node_id in &affected_node_ids {
461 tx.execute(
462 "DELETE FROM graph_edges WHERE from_id = ?1 OR to_id = ?1",
463 params![node_id],
464 )
465 .map_err(storage_err)?;
466 }
467 for path in &affected_file_paths {
468 tx.execute(
469 "DELETE FROM graph_edges WHERE source_file = ?1",
470 params![path],
471 )
472 .map_err(storage_err)?;
473 }
474 for node_id in &affected_node_ids {
475 tx.execute("DELETE FROM graph_nodes WHERE id = ?1", params![node_id])
476 .map_err(storage_err)?;
477 }
478 for file_id in &affected_file_ids {
479 tx.execute(
480 "DELETE FROM graph_nodes WHERE file_id = ?1",
481 params![&file_id.0],
482 )
483 .map_err(storage_err)?;
484 }
485 for symbol_id in &affected_symbol_ids {
486 tx.execute(
487 "DELETE FROM graph_nodes WHERE symbol_id = ?1",
488 params![&symbol_id.0],
489 )
490 .map_err(storage_err)?;
491 }
492
493 for symbol_id in &affected_symbol_ids {
494 tx.execute(
495 "DELETE FROM occurrences WHERE symbol_id = ?1",
496 params![&symbol_id.0],
497 )
498 .map_err(storage_err)?;
499 }
500 for file_id in &affected_file_ids {
501 tx.execute(
502 "DELETE FROM occurrences WHERE file_id = ?1",
503 params![&file_id.0],
504 )
505 .map_err(storage_err)?;
506 tx.execute(
507 "DELETE FROM analysis_facts WHERE file_id = ?1",
508 params![&file_id.0],
509 )
510 .map_err(storage_err)?;
511 tx.execute(
512 "DELETE FROM imports WHERE file_id = ?1",
513 params![&file_id.0],
514 )
515 .map_err(storage_err)?;
516 tx.execute("DELETE FROM tests WHERE file_id = ?1", params![&file_id.0])
517 .map_err(storage_err)?;
518 tx.execute("DELETE FROM chunks WHERE file_id = ?1", params![&file_id.0])
519 .map_err(storage_err)?;
520 tx.execute(
521 "DELETE FROM symbols WHERE file_id = ?1",
522 params![&file_id.0],
523 )
524 .map_err(storage_err)?;
525 tx.execute("DELETE FROM files WHERE id = ?1", params![&file_id.0])
526 .map_err(storage_err)?;
527 }
528
529 insert_index_rows(
530 &tx,
531 IndexRows {
532 files: update.changed_files,
533 symbols: update.symbols,
534 chunks: update.chunks,
535 tests: update.tests,
536 imports: update.imports,
537 occurrences: update.occurrences,
538 analysis_facts: update.analysis_facts,
539 },
540 )?;
541 insert_graph_rows(&tx, update.graph_nodes, update.graph_edges)?;
542 tx.commit().map_err(storage_err)?;
543 Ok(())
544 }
545
546 fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>> {
547 let conn = self
548 .connection
549 .lock()
550 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
551 let mut stmt = conn
552 .prepare("SELECT json FROM files ORDER BY path LIMIT ?1 OFFSET ?2")
553 .map_err(storage_err)?;
554 let rows = stmt
555 .query_map(params![limit as i64, offset as i64], |row| {
556 row.get::<_, String>(0)
557 })
558 .map_err(storage_err)?;
559 collect_json(rows)
560 }
561
562 fn get_file_by_path(&self, path: &Path) -> Result<Option<File>> {
563 let conn = self
564 .connection
565 .lock()
566 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
567 let raw: Option<String> = conn
568 .query_row(
569 "SELECT json FROM files WHERE path = ?1",
570 params![path.to_string_lossy().as_ref()],
571 |row| row.get(0),
572 )
573 .optional()
574 .map_err(storage_err)?;
575 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
576 .transpose()
577 }
578
579 fn list_symbols(
580 &self,
581 query: Option<&str>,
582 limit: usize,
583 offset: usize,
584 ) -> Result<Vec<Symbol>> {
585 let conn = self
586 .connection
587 .lock()
588 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
589 let pattern = format!("%{}%", query.unwrap_or_default());
590 let mut stmt = conn
591 .prepare(
592 "SELECT json FROM symbols WHERE (?1 = '%%' OR name LIKE ?1 COLLATE NOCASE OR qualified_name LIKE ?1 COLLATE NOCASE) ORDER BY qualified_name LIMIT ?2 OFFSET ?3",
593 )
594 .map_err(storage_err)?;
595 let rows = stmt
596 .query_map(params![pattern, limit as i64, offset as i64], |row| {
597 row.get::<_, String>(0)
598 })
599 .map_err(storage_err)?;
600 collect_json(rows)
601 }
602
603 fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>> {
604 let conn = self
605 .connection
606 .lock()
607 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
608 let raw: Option<String> = conn
609 .query_row(
610 "SELECT json FROM symbols WHERE id = ?1",
611 params![&id.0],
612 |row| row.get(0),
613 )
614 .optional()
615 .map_err(storage_err)?;
616 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
617 .transpose()
618 }
619
620 fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>> {
621 let conn = self
622 .connection
623 .lock()
624 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
625 let mut stmt = conn
626 .prepare("SELECT json FROM chunks WHERE file_id = ?1 ORDER BY start_line")
627 .map_err(storage_err)?;
628 let rows = stmt
629 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
630 .map_err(storage_err)?;
631 collect_json(rows)
632 }
633
634 fn all_chunks(&self) -> Result<Vec<CodeChunk>> {
635 let conn = self
636 .connection
637 .lock()
638 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
639 let mut stmt = conn
640 .prepare("SELECT json FROM chunks ORDER BY file_id, start_line")
641 .map_err(storage_err)?;
642 let rows = stmt
643 .query_map([], |row| row.get::<_, String>(0))
644 .map_err(storage_err)?;
645 collect_json(rows)
646 }
647
648 fn tests(&self) -> Result<Vec<TestTarget>> {
649 let conn = self
650 .connection
651 .lock()
652 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
653 let mut stmt = conn
654 .prepare("SELECT json FROM tests ORDER BY file_id")
655 .map_err(storage_err)?;
656 let rows = stmt
657 .query_map([], |row| row.get::<_, String>(0))
658 .map_err(storage_err)?;
659 collect_json(rows)
660 }
661
662 fn imports(&self) -> Result<Vec<Import>> {
663 let conn = self
664 .connection
665 .lock()
666 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
667 let mut stmt = conn
668 .prepare("SELECT json FROM imports ORDER BY file_id")
669 .map_err(storage_err)?;
670 let rows = stmt
671 .query_map([], |row| row.get::<_, String>(0))
672 .map_err(storage_err)?;
673 collect_json(rows)
674 }
675
676 fn analysis_facts(
677 &self,
678 source_type: Option<EvidenceSourceType>,
679 limit: usize,
680 ) -> Result<Vec<AnalysisFact>> {
681 let conn = self
682 .connection
683 .lock()
684 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
685 let limit = limit.min(i64::MAX as usize) as i64;
686 let rows = if let Some(source_type) = source_type {
687 let mut stmt = conn
688 .prepare(
689 "SELECT json FROM analysis_facts WHERE source_type = ?1 ORDER BY file_id, target LIMIT ?2",
690 )
691 .map_err(storage_err)?;
692 let rows = stmt
693 .query_map(params![source_type_name(&source_type), limit], |row| {
694 row.get::<_, String>(0)
695 })
696 .map_err(storage_err)?;
697 collect_json(rows)?
698 } else {
699 let mut stmt = conn
700 .prepare("SELECT json FROM analysis_facts ORDER BY file_id, target LIMIT ?1")
701 .map_err(storage_err)?;
702 let rows = stmt
703 .query_map(params![limit], |row| row.get::<_, String>(0))
704 .map_err(storage_err)?;
705 collect_json(rows)?
706 };
707 Ok(rows)
708 }
709
710 fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>> {
711 let conn = self
712 .connection
713 .lock()
714 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
715 let mut stmt = conn
716 .prepare(
717 "SELECT json FROM occurrences WHERE symbol_id = ?1 AND is_definition = 0 ORDER BY file_id LIMIT ?2",
718 )
719 .map_err(storage_err)?;
720 let rows = stmt
721 .query_map(params![&id.0, limit as i64], |row| row.get::<_, String>(0))
722 .map_err(storage_err)?;
723 collect_json(rows)
724 }
725
726 fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>> {
727 let conn = self
728 .connection
729 .lock()
730 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
731 let mut stmt = conn
732 .prepare("SELECT json FROM occurrences WHERE file_id = ?1 ORDER BY symbol_id")
733 .map_err(storage_err)?;
734 let rows = stmt
735 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
736 .map_err(storage_err)?;
737 collect_json(rows)
738 }
739
740 fn symbols_for_file(&self, file_id: &FileId) -> Result<Vec<Symbol>> {
741 let conn = self
742 .connection
743 .lock()
744 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
745 let mut stmt = conn
746 .prepare("SELECT json FROM symbols WHERE file_id = ?1 ORDER BY name")
747 .map_err(storage_err)?;
748 let rows = stmt
749 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
750 .map_err(storage_err)?;
751 collect_json(rows)
752 }
753
754 fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
755 let conn = self
756 .connection
757 .lock()
758 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
759 let pattern = format!("%{}%", query);
760 let mut stmt = conn
761 .prepare("SELECT json FROM chunks WHERE text LIKE ?1 LIMIT ?2")
762 .map_err(storage_err)?;
763 let rows = stmt
764 .query_map(params![pattern, limit as i64], |row| {
765 row.get::<_, String>(0)
766 })
767 .map_err(storage_err)?;
768 collect_json(rows)
769 }
770
771 fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
772 let conn = self
773 .connection
774 .lock()
775 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
776 let match_pat = format!("%{}%", pattern);
777 let mut stmt = conn
778 .prepare("SELECT json FROM files WHERE path LIKE ?1 COLLATE NOCASE")
779 .map_err(storage_err)?;
780 let rows = stmt
781 .query_map(params![match_pat], |row| row.get::<_, String>(0))
782 .map_err(storage_err)?;
783 collect_json(rows)
784 }
785
786 fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
787 if file_ids.is_empty() {
788 return Ok(Vec::new());
789 }
790 let conn = self
791 .connection
792 .lock()
793 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
794
795 let placeholders = file_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
796 let sql = format!("SELECT json FROM tests WHERE file_id IN ({})", placeholders);
797 let mut stmt = conn.prepare(&sql).map_err(storage_err)?;
798
799 let params = rusqlite::params_from_iter(file_ids.iter().map(|id| &id.0));
800 let rows = stmt
801 .query_map(params, |row| row.get::<_, String>(0))
802 .map_err(storage_err)?;
803 collect_json(rows)
804 }
805}
806
807impl HistoryStore for SqliteStore {
808 fn put_history_snapshot(&self, snapshot: &HistorySnapshot) -> Result<()> {
809 validate_history_snapshot(snapshot)?;
810 let mut conn = self
811 .connection
812 .lock()
813 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
814 let tx = conn.transaction().map_err(storage_err)?;
815
816 tx.execute("DELETE FROM git_review_events", [])
817 .map_err(storage_err)?;
818 tx.execute("DELETE FROM history_hotspots", [])
819 .map_err(storage_err)?;
820 tx.execute("DELETE FROM git_cochange_edges", [])
821 .map_err(storage_err)?;
822 tx.execute("DELETE FROM git_symbol_touches", [])
823 .map_err(storage_err)?;
824 tx.execute("DELETE FROM git_file_touches", [])
825 .map_err(storage_err)?;
826 tx.execute("DELETE FROM git_commits", [])
827 .map_err(storage_err)?;
828
829 for commit in &snapshot.commits {
830 tx.execute(
831 "INSERT INTO git_commits(id, authored_at, committed_at, author_email, json) VALUES(?1, ?2, ?3, ?4, ?5)",
832 params![
833 &commit.id.0,
834 commit.authored_at.to_rfc3339(),
835 commit.committed_at.to_rfc3339(),
836 commit.author.email.as_deref(),
837 serde_json::to_string(commit)?,
838 ],
839 )
840 .map_err(storage_err)?;
841 }
842 for touch in &snapshot.file_touches {
843 tx.execute(
844 "INSERT INTO git_file_touches(id, commit_id, path, previous_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
845 params![
846 &touch.id.0,
847 &touch.commit_id.0,
848 history_path(&touch.path)?,
849 touch
850 .previous_path
851 .as_deref()
852 .map(history_path)
853 .transpose()?,
854 touch.touched_at.to_rfc3339(),
855 serde_json::to_string(touch)?,
856 ],
857 )
858 .map_err(storage_err)?;
859 }
860 for touch in &snapshot.symbol_touches {
861 tx.execute(
862 "INSERT INTO git_symbol_touches(id, commit_id, symbol_id, qualified_name, file_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
863 params![
864 &touch.id.0,
865 &touch.commit_id.0,
866 touch.symbol_id.as_ref().map(|id| id.0.as_str()),
867 &touch.qualified_name,
868 history_path(&touch.file_path)?,
869 touch.touched_at.to_rfc3339(),
870 serde_json::to_string(touch)?,
871 ],
872 )
873 .map_err(storage_err)?;
874 }
875 for edge in &snapshot.cochange_edges {
876 tx.execute(
877 "INSERT INTO git_cochange_edges(id, path, cochanged_path, commit_count, recency_weight, last_changed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
878 params![
879 &edge.id.0,
880 history_path(&edge.path)?,
881 history_path(&edge.cochanged_path)?,
882 usize_to_i64(edge.commit_count, "co-change commit count")?,
883 edge.recency_weight,
884 edge.last_changed_at.map(|value| value.to_rfc3339()),
885 serde_json::to_string(edge)?,
886 ],
887 )
888 .map_err(storage_err)?;
889 }
890 for evidence in &snapshot.reviewer_evidence {
891 let reviewer_identity = evidence
892 .reviewer
893 .email
894 .as_deref()
895 .unwrap_or(&evidence.reviewer.name);
896 tx.execute(
897 "INSERT INTO git_review_events(id, commit_id, path, reviewer_identity, observed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
898 params![
899 &evidence.id.0,
900 evidence.commit_id.as_ref().map(|id| id.0.as_str()),
901 evidence.path.as_deref().map(history_path).transpose()?,
902 reviewer_identity,
903 evidence.observed_at.to_rfc3339(),
904 serde_json::to_string(evidence)?,
905 ],
906 )
907 .map_err(storage_err)?;
908 }
909 for summary in materialize_churn_summaries(snapshot)? {
910 tx.execute(
911 "INSERT INTO history_hotspots(entity_kind, entity_key, path, symbol_id, qualified_name, hotspot_score, touch_count, generated_at, json)
912 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
913 params![
914 churn_entity_kind_key(summary.entity_kind),
915 &summary.key,
916 summary.path.as_deref().map(history_path).transpose()?,
917 summary.symbol_id.as_ref().map(|id| id.0.as_str()),
918 summary.qualified_name.as_deref(),
919 summary.stats.hotspot_score,
920 usize_to_i64(summary.stats.touch_count, "history hotspot touch count")?,
921 summary.generated_at.to_rfc3339(),
922 serde_json::to_string(&summary)?,
923 ],
924 )
925 .map_err(storage_err)?;
926 }
927
928 tx.commit().map_err(storage_err)?;
929 Ok(())
930 }
931
932 fn history_for_file(&self, path: &Path, limit: usize) -> Result<HistorySummary> {
933 let normalized_path = history_path(path)?;
934 if limit == 0 {
935 return Ok(HistorySummary {
936 path: path.to_path_buf(),
937 recent_commits: Vec::new(),
938 file_touches: Vec::new(),
939 symbol_touches: Vec::new(),
940 cochange_neighbors: Vec::new(),
941 reviewer_evidence: Vec::new(),
942 truncated: false,
943 uncertainty: vec!["history query limit is zero".into()],
944 });
945 }
946
947 let conn = self
948 .connection
949 .lock()
950 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
951 let query_limit = history_query_limit(limit);
952
953 let mut commit_stmt = conn
954 .prepare(
955 "SELECT c.json FROM git_commits c
956 WHERE EXISTS (
957 SELECT 1 FROM git_file_touches t
958 WHERE t.commit_id = c.id AND (t.path = ?1 OR t.previous_path = ?1)
959 )
960 ORDER BY c.committed_at DESC, c.id
961 LIMIT ?2",
962 )
963 .map_err(storage_err)?;
964 let commit_rows = commit_stmt
965 .query_map(params![&normalized_path, query_limit], |row| {
966 row.get::<_, String>(0)
967 })
968 .map_err(storage_err)?;
969 let (recent_commits, commits_truncated) = collect_limited_json(commit_rows, limit)?;
970
971 let mut file_touch_stmt = conn
972 .prepare(
973 "SELECT json FROM git_file_touches
974 WHERE path = ?1 OR previous_path = ?1
975 ORDER BY touched_at DESC, id
976 LIMIT ?2",
977 )
978 .map_err(storage_err)?;
979 let file_touch_rows = file_touch_stmt
980 .query_map(params![&normalized_path, query_limit], |row| {
981 row.get::<_, String>(0)
982 })
983 .map_err(storage_err)?;
984 let (file_touches, file_touches_truncated) = collect_limited_json(file_touch_rows, limit)?;
985
986 let mut symbol_touch_stmt = conn
987 .prepare(
988 "SELECT json FROM git_symbol_touches
989 WHERE file_path = ?1
990 ORDER BY touched_at DESC, id
991 LIMIT ?2",
992 )
993 .map_err(storage_err)?;
994 let symbol_touch_rows = symbol_touch_stmt
995 .query_map(params![&normalized_path, query_limit], |row| {
996 row.get::<_, String>(0)
997 })
998 .map_err(storage_err)?;
999 let (symbol_touches, symbol_touches_truncated) =
1000 collect_limited_json(symbol_touch_rows, limit)?;
1001
1002 let mut cochange_stmt = conn
1003 .prepare(
1004 "SELECT json FROM git_cochange_edges
1005 WHERE path = ?1
1006 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
1007 LIMIT ?2",
1008 )
1009 .map_err(storage_err)?;
1010 let cochange_rows = cochange_stmt
1011 .query_map(params![&normalized_path, query_limit], |row| {
1012 row.get::<_, String>(0)
1013 })
1014 .map_err(storage_err)?;
1015 let (cochange_neighbors, cochange_truncated) = collect_limited_json(cochange_rows, limit)?;
1016
1017 let mut reviewer_stmt = conn
1018 .prepare(
1019 "SELECT e.json FROM git_review_events e
1020 WHERE e.path = ?1
1021 OR (
1022 e.path IS NULL
1023 AND e.commit_id IN (
1024 SELECT t.commit_id FROM git_file_touches t
1025 WHERE t.path = ?1 OR t.previous_path = ?1
1026 )
1027 )
1028 ORDER BY e.observed_at DESC, e.id
1029 LIMIT ?2",
1030 )
1031 .map_err(storage_err)?;
1032 let reviewer_rows = reviewer_stmt
1033 .query_map(params![&normalized_path, query_limit], |row| {
1034 row.get::<_, String>(0)
1035 })
1036 .map_err(storage_err)?;
1037 let (reviewer_evidence, reviewers_truncated) = collect_limited_json(reviewer_rows, limit)?;
1038
1039 let truncated = commits_truncated
1040 || file_touches_truncated
1041 || symbol_touches_truncated
1042 || cochange_truncated
1043 || reviewers_truncated;
1044 let mut uncertainty = Vec::new();
1045 if recent_commits.is_empty()
1046 && file_touches.is_empty()
1047 && symbol_touches.is_empty()
1048 && cochange_neighbors.is_empty()
1049 && reviewer_evidence.is_empty()
1050 {
1051 uncertainty.push("no persisted history evidence is available for this path".into());
1052 } else {
1053 if symbol_touches.is_empty() {
1054 uncertainty.push("no symbol-level history is stored for this path".into());
1055 }
1056 if reviewer_evidence.is_empty() {
1057 uncertainty.push("no reviewer or owner evidence is stored for this path".into());
1058 }
1059 }
1060 if truncated {
1061 uncertainty.push(format!(
1062 "history results are truncated to {limit} records per category"
1063 ));
1064 }
1065
1066 Ok(HistorySummary {
1067 path: path.to_path_buf(),
1068 recent_commits,
1069 file_touches,
1070 symbol_touches,
1071 cochange_neighbors,
1072 reviewer_evidence,
1073 truncated,
1074 uncertainty,
1075 })
1076 }
1077
1078 fn provenance_for_path(&self, path: &Path, limit: usize) -> Result<FileProvenance> {
1079 let normalized_path = history_path(path)?;
1080 if limit == 0 {
1081 return Ok(FileProvenance {
1082 path: path.to_path_buf(),
1083 first_seen: None,
1084 last_touched: None,
1085 recent_touches: Vec::new(),
1086 confidence: Confidence::Low,
1087 truncated: false,
1088 uncertainty: vec!["provenance query limit is zero".into()],
1089 });
1090 }
1091
1092 let conn = self
1093 .connection
1094 .lock()
1095 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1096 let query_limit = history_query_limit(limit);
1097 let aliases = "
1098 WITH RECURSIVE aliases(path) AS (
1099 SELECT ?1
1100 UNION
1101 SELECT t.previous_path
1102 FROM git_file_touches t JOIN aliases a ON t.path = a.path
1103 WHERE t.previous_path IS NOT NULL
1104 UNION
1105 SELECT t.path
1106 FROM git_file_touches t JOIN aliases a ON t.previous_path = a.path
1107 )";
1108 let recent_sql = format!(
1109 "{aliases}
1110 SELECT DISTINCT t.json, c.json
1111 FROM git_file_touches t
1112 JOIN git_commits c ON c.id = t.commit_id
1113 WHERE t.path IN aliases OR t.previous_path IN aliases
1114 ORDER BY t.touched_at DESC, t.id
1115 LIMIT ?2"
1116 );
1117 let mut recent_stmt = conn.prepare(&recent_sql).map_err(storage_err)?;
1118 let rows = recent_stmt
1119 .query_map(params![&normalized_path, query_limit], |row| {
1120 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1121 })
1122 .map_err(storage_err)?;
1123 let mut recent_touches = collect_provenance_rows(rows, file_provenance_touch)?;
1124 let truncated = recent_touches.len() > limit;
1125 recent_touches.truncate(limit);
1126
1127 let first_sql = format!(
1128 "{aliases}
1129 SELECT DISTINCT t.json, c.json
1130 FROM git_file_touches t
1131 JOIN git_commits c ON c.id = t.commit_id
1132 WHERE t.path IN aliases OR t.previous_path IN aliases
1133 ORDER BY t.touched_at ASC, t.id
1134 LIMIT 1"
1135 );
1136 let first_seen = conn
1137 .query_row(&first_sql, params![&normalized_path], |row| {
1138 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1139 })
1140 .optional()
1141 .map_err(storage_err)?
1142 .map(|(touch, commit)| file_provenance_touch(&touch, &commit))
1143 .transpose()?;
1144 let last_touched = recent_touches.first().cloned();
1145 let mut uncertainty = Vec::new();
1146 if first_seen.is_none() {
1147 uncertainty.push("no persisted commit provenance is available for this path".into());
1148 } else if first_seen
1149 .as_ref()
1150 .is_some_and(|touch| touch.change_kind != open_kioku_core::GitChangeKind::Added)
1151 {
1152 uncertainty.push(
1153 "first_seen is the earliest persisted touch in the configured local history window, not a proven file-creation commit"
1154 .into(),
1155 );
1156 }
1157 if truncated {
1158 uncertainty.push(format!(
1159 "recent provenance is truncated to {limit} touch records"
1160 ));
1161 }
1162
1163 let confidence = if uncertainty.is_empty() {
1164 Confidence::Exact
1165 } else if last_touched.is_some() {
1166 Confidence::High
1167 } else {
1168 Confidence::Low
1169 };
1170 Ok(FileProvenance {
1171 path: path.to_path_buf(),
1172 first_seen,
1173 last_touched,
1174 recent_touches,
1175 confidence,
1176 truncated,
1177 uncertainty,
1178 })
1179 }
1180
1181 fn churn_for_file(&self, path: &Path) -> Result<ChurnSummary> {
1182 let normalized_path = history_path(path)?;
1183 self.churn_by_kind_and_key(ChurnEntityKind::File, &normalized_path, || {
1184 ChurnSummary::missing(ChurnEntityKind::File, normalized_path.clone())
1185 })
1186 }
1187
1188 fn churn_for_module(&self, module: &Path) -> Result<ChurnSummary> {
1189 let normalized_module = if module == Path::new(".") || module.as_os_str().is_empty() {
1190 "__root__".to_string()
1191 } else {
1192 history_path(module)?
1193 };
1194 self.churn_by_kind_and_key(ChurnEntityKind::Module, &normalized_module, || {
1195 ChurnSummary::missing(ChurnEntityKind::Module, normalized_module.clone())
1196 })
1197 }
1198
1199 fn churn_for_symbol(&self, symbol_id: &SymbolId) -> Result<ChurnSummary> {
1200 self.churn_by_kind_and_key(ChurnEntityKind::Symbol, &symbol_id.0, || {
1201 let mut summary = ChurnSummary::missing(ChurnEntityKind::Symbol, symbol_id.0.clone());
1202 summary.symbol_id = Some(symbol_id.clone());
1203 summary.uncertainty =
1204 vec!["no persisted symbol-level churn is available for this symbol".into()];
1205 summary
1206 })
1207 }
1208
1209 fn provenance_for_symbol(
1210 &self,
1211 symbol_id: &SymbolId,
1212 limit: usize,
1213 ) -> Result<SymbolProvenance> {
1214 let conn = self
1215 .connection
1216 .lock()
1217 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1218 let symbol_json: Option<String> = conn
1219 .query_row(
1220 "SELECT json FROM symbols WHERE id = ?1",
1221 params![&symbol_id.0],
1222 |row| row.get(0),
1223 )
1224 .optional()
1225 .map_err(storage_err)?;
1226 let Some(symbol_json) = symbol_json else {
1227 return Err(OkError::SymbolNotFound(symbol_id.0.clone()));
1228 };
1229 let symbol: Symbol = serde_json::from_str(&symbol_json)?;
1230 let file_path: String = conn
1231 .query_row(
1232 "SELECT path FROM files WHERE id = ?1",
1233 params![&symbol.file_id.0],
1234 |row| row.get(0),
1235 )
1236 .map_err(storage_err)?;
1237 if limit == 0 {
1238 return Ok(SymbolProvenance {
1239 symbol_id: symbol.id,
1240 qualified_name: symbol.qualified_name,
1241 file_path: PathBuf::from(file_path),
1242 range: symbol.range,
1243 first_seen: None,
1244 last_touched: None,
1245 recent_touches: Vec::new(),
1246 confidence: Confidence::Low,
1247 truncated: false,
1248 uncertainty: vec!["provenance query limit is zero".into()],
1249 });
1250 }
1251
1252 let query_limit = history_query_limit(limit);
1253 let mut recent_stmt = conn
1254 .prepare(
1255 "SELECT t.json, c.json
1256 FROM git_symbol_touches t
1257 JOIN git_commits c ON c.id = t.commit_id
1258 WHERE t.symbol_id = ?1
1259 ORDER BY t.touched_at DESC, t.id
1260 LIMIT ?2",
1261 )
1262 .map_err(storage_err)?;
1263 let rows = recent_stmt
1264 .query_map(params![&symbol_id.0, query_limit], |row| {
1265 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1266 })
1267 .map_err(storage_err)?;
1268 let mut recent_touches = collect_provenance_rows(rows, symbol_provenance_touch)?;
1269 let truncated = recent_touches.len() > limit;
1270 recent_touches.truncate(limit);
1271 let first_seen = conn
1272 .query_row(
1273 "SELECT t.json, c.json
1274 FROM git_symbol_touches t
1275 JOIN git_commits c ON c.id = t.commit_id
1276 WHERE t.symbol_id = ?1
1277 ORDER BY t.touched_at ASC, t.id
1278 LIMIT 1",
1279 params![&symbol_id.0],
1280 |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)),
1281 )
1282 .optional()
1283 .map_err(storage_err)?
1284 .map(|(touch, commit)| symbol_provenance_touch(&touch, &commit))
1285 .transpose()?;
1286 let last_touched = recent_touches.first().cloned();
1287 let mut uncertainty = recent_touches
1288 .iter()
1289 .flat_map(|touch| touch.uncertainty.clone())
1290 .collect::<Vec<_>>();
1291 if let Some(first_seen) = &first_seen {
1292 uncertainty.extend(first_seen.uncertainty.clone());
1293 uncertainty.push(
1294 "first_seen is the earliest line-mapped touch in the configured local history window; it may not be the symbol-introduction commit"
1295 .into(),
1296 );
1297 } else {
1298 uncertainty
1299 .push("no persisted line-level commit mapping is available for this symbol".into());
1300 }
1301 if symbol.range.is_none() {
1302 uncertainty.push(
1303 "the indexed symbol has no line range, so commit hunks cannot be mapped".into(),
1304 );
1305 }
1306 if truncated {
1307 uncertainty.push(format!(
1308 "recent provenance is truncated to {limit} touch records"
1309 ));
1310 }
1311 uncertainty.sort();
1312 uncertainty.dedup();
1313 let confidence = recent_touches
1314 .iter()
1315 .map(|touch| touch.confidence)
1316 .chain(first_seen.iter().map(|touch| touch.confidence))
1317 .reduce(lower_history_confidence)
1318 .unwrap_or(Confidence::Low);
1319
1320 Ok(SymbolProvenance {
1321 symbol_id: symbol.id,
1322 qualified_name: symbol.qualified_name,
1323 file_path: PathBuf::from(file_path),
1324 range: symbol.range,
1325 first_seen,
1326 last_touched,
1327 recent_touches,
1328 confidence,
1329 truncated,
1330 uncertainty,
1331 })
1332 }
1333
1334 fn similar_changes(
1335 &self,
1336 query: &SimilarChangeQuery,
1337 limit: usize,
1338 ) -> Result<SimilarChangeReport> {
1339 let normalized_query = normalize_similar_change_query(query)?;
1340 if limit == 0 {
1341 return Ok(SimilarChangeReport {
1342 query: normalized_query,
1343 generated_at: Utc::now(),
1344 hits: Vec::new(),
1345 truncated: false,
1346 uncertainty: vec!["similar-change query limit is zero".into()],
1347 });
1348 }
1349
1350 let task_tokens = normalized_query
1351 .task
1352 .as_deref()
1353 .map(tokenize_similarity_text)
1354 .unwrap_or_default();
1355 let query_paths = normalized_query
1356 .paths
1357 .iter()
1358 .map(|path| history_path(path))
1359 .collect::<Result<BTreeSet<_>>>()?;
1360 let symbol_queries = normalized_query
1361 .symbols
1362 .iter()
1363 .map(|symbol| symbol.to_lowercase())
1364 .collect::<BTreeSet<_>>();
1365
1366 if task_tokens.is_empty() && query_paths.is_empty() && symbol_queries.is_empty() {
1367 return Ok(SimilarChangeReport {
1368 query: normalized_query,
1369 generated_at: Utc::now(),
1370 hits: Vec::new(),
1371 truncated: false,
1372 uncertainty: vec![
1373 "provide at least one task, path, or symbol similarity signal".into(),
1374 ],
1375 });
1376 }
1377
1378 let conn = self
1379 .connection
1380 .lock()
1381 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1382 let scan_limit = similar_history_scan_limit(limit);
1383
1384 let commits = load_similarity_commits(&conn, scan_limit)?;
1385 if commits.is_empty() {
1386 return Ok(SimilarChangeReport {
1387 query: normalized_query,
1388 generated_at: Utc::now(),
1389 hits: Vec::new(),
1390 truncated: false,
1391 uncertainty: vec!["no persisted commit history is available".into()],
1392 });
1393 }
1394 let file_touches = load_similarity_file_touches(&conn, scan_limit)?;
1395 let symbol_touches = load_similarity_symbol_touches(&conn, scan_limit)?;
1396 let cochange_edges = load_similarity_cochange_edges(&conn)?;
1397 let hotspots = load_similarity_file_hotspots(&conn)?;
1398
1399 let mut file_touches_by_commit: BTreeMap<String, Vec<GitFileTouch>> = BTreeMap::new();
1400 for touch in file_touches {
1401 file_touches_by_commit
1402 .entry(touch.commit_id.0.clone())
1403 .or_default()
1404 .push(touch);
1405 }
1406
1407 let mut symbol_touches_by_commit: BTreeMap<String, Vec<GitSymbolTouch>> = BTreeMap::new();
1408 for touch in symbol_touches {
1409 symbol_touches_by_commit
1410 .entry(touch.commit_id.0.clone())
1411 .or_default()
1412 .push(touch);
1413 }
1414
1415 let mut query_neighbors: BTreeMap<String, Vec<GitCochangeEdge>> = BTreeMap::new();
1416 let mut sample_edges_by_commit: BTreeMap<String, Vec<GitCochangeEdge>> = BTreeMap::new();
1417 for edge in cochange_edges {
1418 let path = history_path(&edge.path)?;
1419 let cochanged_path = history_path(&edge.cochanged_path)?;
1420 let touches_query_path =
1421 query_paths.contains(&path) || query_paths.contains(&cochanged_path);
1422 if query_paths.contains(&path) {
1423 query_neighbors
1424 .entry(cochanged_path.clone())
1425 .or_default()
1426 .push(edge.clone());
1427 }
1428 if query_paths.contains(&cochanged_path) {
1429 query_neighbors
1430 .entry(path.clone())
1431 .or_default()
1432 .push(edge.clone());
1433 }
1434 if touches_query_path {
1435 for commit_id in &edge.sample_commits {
1436 sample_edges_by_commit
1437 .entry(commit_id.0.clone())
1438 .or_default()
1439 .push(edge.clone());
1440 }
1441 }
1442 }
1443
1444 let query_related_paths = query_paths
1445 .iter()
1446 .cloned()
1447 .chain(query_neighbors.keys().cloned())
1448 .collect::<BTreeSet<_>>();
1449
1450 let mut hits = Vec::new();
1451 for commit in commits {
1452 let file_touches = file_touches_by_commit
1453 .get(&commit.id.0)
1454 .map(Vec::as_slice)
1455 .unwrap_or(&[]);
1456 let symbol_touches = symbol_touches_by_commit
1457 .get(&commit.id.0)
1458 .map(Vec::as_slice)
1459 .unwrap_or(&[]);
1460
1461 let candidate = score_similar_commit(
1462 &normalized_query,
1463 &task_tokens,
1464 &query_paths,
1465 &symbol_queries,
1466 &query_neighbors,
1467 &query_related_paths,
1468 &sample_edges_by_commit,
1469 &hotspots,
1470 &commit,
1471 file_touches,
1472 symbol_touches,
1473 )?;
1474 if candidate.score > 0.0 {
1475 hits.push(candidate);
1476 }
1477 }
1478
1479 hits.sort_by(|left, right| {
1480 right
1481 .score
1482 .total_cmp(&left.score)
1483 .then_with(|| {
1484 history_confidence_rank(right.confidence)
1485 .cmp(&history_confidence_rank(left.confidence))
1486 })
1487 .then_with(|| {
1488 right
1489 .change
1490 .commit
1491 .committed_at
1492 .cmp(&left.change.commit.committed_at)
1493 })
1494 .then_with(|| left.change.commit.id.0.cmp(&right.change.commit.id.0))
1495 });
1496 let truncated = hits.len() > limit;
1497 hits.truncate(limit);
1498
1499 let mut uncertainty = Vec::new();
1500 if hits.is_empty() {
1501 uncertainty.push("no similar historical changes matched the query signals".into());
1502 }
1503 if truncated {
1504 uncertainty.push(format!(
1505 "similar-change results are truncated to {limit} hits"
1506 ));
1507 }
1508
1509 Ok(SimilarChangeReport {
1510 query: normalized_query,
1511 generated_at: Utc::now(),
1512 hits,
1513 truncated,
1514 uncertainty,
1515 })
1516 }
1517
1518 fn cochange_neighbors(&self, path: &Path, limit: usize) -> Result<Vec<GitCochangeEdge>> {
1519 if limit == 0 {
1520 return Ok(Vec::new());
1521 }
1522 let normalized_path = history_path(path)?;
1523 let conn = self
1524 .connection
1525 .lock()
1526 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1527 let mut stmt = conn
1528 .prepare(
1529 "SELECT json FROM git_cochange_edges
1530 WHERE path = ?1
1531 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
1532 LIMIT ?2",
1533 )
1534 .map_err(storage_err)?;
1535 let rows = stmt
1536 .query_map(
1537 params![normalized_path, limit.min(i64::MAX as usize) as i64],
1538 |row| row.get::<_, String>(0),
1539 )
1540 .map_err(storage_err)?;
1541 collect_json(rows)
1542 }
1543
1544 fn recent_commits(&self, limit: usize) -> Result<Vec<GitCommitRecord>> {
1545 if limit == 0 {
1546 return Ok(Vec::new());
1547 }
1548 let conn = self
1549 .connection
1550 .lock()
1551 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1552 let mut stmt = conn
1553 .prepare("SELECT json FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1")
1554 .map_err(storage_err)?;
1555 let rows = stmt
1556 .query_map(params![limit.min(i64::MAX as usize) as i64], |row| {
1557 row.get::<_, String>(0)
1558 })
1559 .map_err(storage_err)?;
1560 collect_json(rows)
1561 }
1562}
1563
1564fn collect_provenance_rows<F>(
1565 rows: rusqlite::MappedRows<'_, F>,
1566 decode: fn(&str, &str) -> Result<ProvenanceTouch>,
1567) -> Result<Vec<ProvenanceTouch>>
1568where
1569 F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<(String, String)>,
1570{
1571 let mut touches = Vec::new();
1572 for row in rows {
1573 let (touch, commit) = row.map_err(storage_err)?;
1574 touches.push(decode(&touch, &commit)?);
1575 }
1576 Ok(touches)
1577}
1578
1579fn file_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1580 let touch: GitFileTouch = serde_json::from_str(touch)?;
1581 let commit: GitCommitRecord = serde_json::from_str(commit)?;
1582 Ok(ProvenanceTouch {
1583 commit,
1584 path: touch.path,
1585 previous_path: touch.previous_path,
1586 symbol_id: None,
1587 qualified_name: None,
1588 change_kind: touch.change_kind,
1589 line_ranges: Vec::new(),
1590 confidence: Confidence::Exact,
1591 uncertainty: Vec::new(),
1592 })
1593}
1594
1595fn symbol_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1596 let touch: GitSymbolTouch = serde_json::from_str(touch)?;
1597 let commit: GitCommitRecord = serde_json::from_str(commit)?;
1598 Ok(ProvenanceTouch {
1599 commit,
1600 path: touch.file_path,
1601 previous_path: None,
1602 symbol_id: touch.symbol_id,
1603 qualified_name: Some(touch.qualified_name),
1604 change_kind: touch.change_kind,
1605 line_ranges: touch.line_ranges,
1606 confidence: touch.confidence,
1607 uncertainty: touch.uncertainty,
1608 })
1609}
1610
1611fn normalize_similar_change_query(query: &SimilarChangeQuery) -> Result<SimilarChangeQuery> {
1612 let task = query
1613 .task
1614 .as_deref()
1615 .map(str::trim)
1616 .filter(|value| !value.is_empty())
1617 .map(str::to_string);
1618
1619 let mut paths = BTreeSet::new();
1620 for path in &query.paths {
1621 paths.insert(PathBuf::from(history_path(path)?));
1622 }
1623
1624 let mut symbols = BTreeSet::new();
1625 for symbol in &query.symbols {
1626 let symbol = symbol.trim();
1627 if !symbol.is_empty() {
1628 symbols.insert(symbol.to_string());
1629 }
1630 }
1631
1632 Ok(SimilarChangeQuery {
1633 task,
1634 paths: paths.into_iter().collect(),
1635 symbols: symbols.into_iter().collect(),
1636 })
1637}
1638
1639fn similar_history_scan_limit(limit: usize) -> i64 {
1640 limit
1641 .saturating_mul(80)
1642 .clamp(500, 5_000)
1643 .min(i64::MAX as usize) as i64
1644}
1645
1646fn load_similarity_commits(conn: &Connection, scan_limit: i64) -> Result<Vec<GitCommitRecord>> {
1647 let mut stmt = conn
1648 .prepare("SELECT json FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1")
1649 .map_err(storage_err)?;
1650 let rows = stmt
1651 .query_map(params![scan_limit], |row| row.get::<_, String>(0))
1652 .map_err(storage_err)?;
1653 collect_json(rows)
1654}
1655
1656fn load_similarity_file_touches(conn: &Connection, scan_limit: i64) -> Result<Vec<GitFileTouch>> {
1657 let mut stmt = conn
1658 .prepare(
1659 "SELECT t.json
1660 FROM git_file_touches t
1661 JOIN (
1662 SELECT id FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1
1663 ) recent ON recent.id = t.commit_id
1664 ORDER BY t.touched_at DESC, t.id",
1665 )
1666 .map_err(storage_err)?;
1667 let rows = stmt
1668 .query_map(params![scan_limit], |row| row.get::<_, String>(0))
1669 .map_err(storage_err)?;
1670 collect_json(rows)
1671}
1672
1673fn load_similarity_symbol_touches(
1674 conn: &Connection,
1675 scan_limit: i64,
1676) -> Result<Vec<GitSymbolTouch>> {
1677 let mut stmt = conn
1678 .prepare(
1679 "SELECT t.json
1680 FROM git_symbol_touches t
1681 JOIN (
1682 SELECT id FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1
1683 ) recent ON recent.id = t.commit_id
1684 ORDER BY t.touched_at DESC, t.id",
1685 )
1686 .map_err(storage_err)?;
1687 let rows = stmt
1688 .query_map(params![scan_limit], |row| row.get::<_, String>(0))
1689 .map_err(storage_err)?;
1690 collect_json(rows)
1691}
1692
1693fn load_similarity_cochange_edges(conn: &Connection) -> Result<Vec<GitCochangeEdge>> {
1694 let mut stmt = conn
1695 .prepare(
1696 "SELECT json FROM git_cochange_edges
1697 ORDER BY recency_weight DESC, commit_count DESC, path, cochanged_path
1698 LIMIT 5000",
1699 )
1700 .map_err(storage_err)?;
1701 let rows = stmt
1702 .query_map([], |row| row.get::<_, String>(0))
1703 .map_err(storage_err)?;
1704 collect_json(rows)
1705}
1706
1707fn load_similarity_file_hotspots(conn: &Connection) -> Result<BTreeMap<String, ChurnSummary>> {
1708 let mut stmt = conn
1709 .prepare(
1710 "SELECT entity_key, json FROM history_hotspots
1711 WHERE entity_kind = 'file'
1712 ORDER BY hotspot_score DESC, touch_count DESC, entity_key
1713 LIMIT 5000",
1714 )
1715 .map_err(storage_err)?;
1716 let rows = stmt
1717 .query_map([], |row| {
1718 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1719 })
1720 .map_err(storage_err)?;
1721 let mut out = BTreeMap::new();
1722 for row in rows {
1723 let (key, json) = row.map_err(storage_err)?;
1724 out.insert(key, serde_json::from_str(&json)?);
1725 }
1726 Ok(out)
1727}
1728
1729#[allow(clippy::too_many_arguments)]
1730fn score_similar_commit(
1731 query: &SimilarChangeQuery,
1732 task_tokens: &BTreeSet<String>,
1733 query_paths: &BTreeSet<String>,
1734 symbol_queries: &BTreeSet<String>,
1735 query_neighbors: &BTreeMap<String, Vec<GitCochangeEdge>>,
1736 query_related_paths: &BTreeSet<String>,
1737 sample_edges_by_commit: &BTreeMap<String, Vec<GitCochangeEdge>>,
1738 hotspots: &BTreeMap<String, ChurnSummary>,
1739 commit: &GitCommitRecord,
1740 file_touches: &[GitFileTouch],
1741 symbol_touches: &[GitSymbolTouch],
1742) -> Result<SimilarChangeHit> {
1743 let mut score = 0.0_f32;
1744 let mut evidence = Vec::new();
1745 let mut source_types = BTreeSet::new();
1746 let mut touched_paths = BTreeSet::new();
1747 let mut touched_symbols = BTreeSet::new();
1748 let mut cochange_paths = BTreeSet::new();
1749 let mut max_hotspot_score = 0.0_f32;
1750
1751 for touch in file_touches {
1752 touched_paths.insert(touch.path.clone());
1753 if let Some(previous_path) = &touch.previous_path {
1754 touched_paths.insert(previous_path.clone());
1755 }
1756 }
1757 for touch in symbol_touches {
1758 touched_symbols.insert(touch.qualified_name.clone());
1759 }
1760
1761 if let Some(task) = &query.task {
1762 let commit_tokens =
1763 tokenize_similarity_text(&format!("{} {}", commit.summary, commit.message));
1764 let overlaps = task_tokens
1765 .intersection(&commit_tokens)
1766 .cloned()
1767 .collect::<Vec<_>>();
1768 if !overlaps.is_empty() {
1769 let contribution = (overlaps.len() as f32 * 0.08).min(0.32);
1770 let task_score = round_similarity_score(contribution * 0.75);
1771 let metadata_score = round_similarity_score(contribution * 0.25);
1772 evidence.push(SimilarityEvidence {
1773 source_type: SimilarityEvidenceSource::TaskText,
1774 score: task_score,
1775 message: format!(
1776 "task text matched commit summary/message token(s): {}",
1777 overlaps.join(", ")
1778 ),
1779 query: Some(task.clone()),
1780 path: None,
1781 symbol: None,
1782 commit_id: Some(commit.id.clone()),
1783 });
1784 evidence.push(SimilarityEvidence {
1785 source_type: SimilarityEvidenceSource::CommitMetadata,
1786 score: metadata_score,
1787 message: "commit summary and message metadata overlap the requested task".into(),
1788 query: Some(task.clone()),
1789 path: None,
1790 symbol: None,
1791 commit_id: Some(commit.id.clone()),
1792 });
1793 score += contribution;
1794 source_types.insert(SimilarityEvidenceSource::TaskText);
1795 source_types.insert(SimilarityEvidenceSource::CommitMetadata);
1796 }
1797 }
1798
1799 let mut path_score = 0.0_f32;
1800 let mut matched_paths = BTreeSet::new();
1801 for touch in file_touches {
1802 let path = history_path(&touch.path)?;
1803 if query_paths.contains(&path) && matched_paths.insert(path.clone()) {
1804 path_score += 0.42;
1805 evidence.push(SimilarityEvidence {
1806 source_type: SimilarityEvidenceSource::Path,
1807 score: 0.42,
1808 message: "commit touched an exact query path".into(),
1809 query: Some(path.clone()),
1810 path: Some(PathBuf::from(path)),
1811 symbol: None,
1812 commit_id: Some(commit.id.clone()),
1813 });
1814 }
1815 if let Some(previous_path) = &touch.previous_path {
1816 let previous_path = history_path(previous_path)?;
1817 if query_paths.contains(&previous_path) && matched_paths.insert(previous_path.clone()) {
1818 path_score += 0.32;
1819 evidence.push(SimilarityEvidence {
1820 source_type: SimilarityEvidenceSource::Path,
1821 score: 0.32,
1822 message: "commit touched a previous name for a query path".into(),
1823 query: Some(previous_path.clone()),
1824 path: Some(PathBuf::from(previous_path)),
1825 symbol: None,
1826 commit_id: Some(commit.id.clone()),
1827 });
1828 }
1829 }
1830 }
1831 if path_score > 0.0 {
1832 score += path_score.min(0.50);
1833 source_types.insert(SimilarityEvidenceSource::Path);
1834 }
1835
1836 let mut symbol_score = 0.0_f32;
1837 let mut matched_symbols = BTreeSet::new();
1838 for touch in symbol_touches {
1839 for query_symbol in symbol_queries {
1840 let Some((matched_symbol, contribution)) = similarity_symbol_match(query_symbol, touch)
1841 else {
1842 continue;
1843 };
1844 if matched_symbols.insert((query_symbol.clone(), matched_symbol.clone())) {
1845 symbol_score += contribution;
1846 evidence.push(SimilarityEvidence {
1847 source_type: SimilarityEvidenceSource::Symbol,
1848 score: contribution,
1849 message: "commit touched a symbol matching the query".into(),
1850 query: Some(query_symbol.clone()),
1851 path: Some(touch.file_path.clone()),
1852 symbol: Some(matched_symbol),
1853 commit_id: Some(commit.id.clone()),
1854 });
1855 }
1856 }
1857 }
1858 if symbol_score > 0.0 {
1859 score += symbol_score.min(0.45);
1860 source_types.insert(SimilarityEvidenceSource::Symbol);
1861 }
1862
1863 let mut cochange_score = 0.0_f32;
1864 let mut matched_cochanges = BTreeSet::new();
1865 for touch in file_touches {
1866 let path = history_path(&touch.path)?;
1867 if let Some(edges) = query_neighbors.get(&path) {
1868 for edge in edges {
1869 let edge_path = history_path(&edge.path)?;
1870 let edge_cochanged = history_path(&edge.cochanged_path)?;
1871 let neighbor = if query_paths.contains(&edge_path) {
1872 edge_cochanged
1873 } else {
1874 edge_path
1875 };
1876 if matched_cochanges.insert(neighbor.clone()) {
1877 let contribution = (0.16 + edge.recency_weight.min(2.5) * 0.03).min(0.26);
1878 cochange_score += contribution;
1879 cochange_paths.insert(PathBuf::from(neighbor.clone()));
1880 evidence.push(SimilarityEvidence {
1881 source_type: SimilarityEvidenceSource::Cochange,
1882 score: round_similarity_score(contribution),
1883 message: "commit touched a co-change neighbor of a query path".into(),
1884 query: query_paths.iter().next().cloned(),
1885 path: Some(PathBuf::from(neighbor)),
1886 symbol: None,
1887 commit_id: Some(commit.id.clone()),
1888 });
1889 }
1890 }
1891 }
1892 }
1893 if let Some(edges) = sample_edges_by_commit.get(&commit.id.0) {
1894 for edge in edges {
1895 let sample_key = format!(
1896 "sample:{}:{}",
1897 edge.path.display(),
1898 edge.cochanged_path.display()
1899 );
1900 if matched_cochanges.insert(sample_key) {
1901 let contribution = 0.10_f32;
1902 cochange_score += contribution;
1903 cochange_paths.insert(edge.path.clone());
1904 cochange_paths.insert(edge.cochanged_path.clone());
1905 evidence.push(SimilarityEvidence {
1906 source_type: SimilarityEvidenceSource::Cochange,
1907 score: contribution,
1908 message: "commit is a persisted sample for a query path co-change edge".into(),
1909 query: query_paths.iter().next().cloned(),
1910 path: Some(edge.cochanged_path.clone()),
1911 symbol: None,
1912 commit_id: Some(commit.id.clone()),
1913 });
1914 }
1915 }
1916 }
1917 if cochange_score > 0.0 {
1918 score += cochange_score.min(0.35);
1919 source_types.insert(SimilarityEvidenceSource::Cochange);
1920 }
1921
1922 let mut churn_score = 0.0_f32;
1923 let mut matched_hotspots = BTreeSet::new();
1924 for touch in file_touches {
1925 let path = history_path(&touch.path)?;
1926 if !query_related_paths.contains(&path) {
1927 continue;
1928 }
1929 let Some(summary) = hotspots.get(&path) else {
1930 continue;
1931 };
1932 if summary.stats.hotspot_score <= 0.0 || !matched_hotspots.insert(path.clone()) {
1933 continue;
1934 }
1935 let contribution = (summary.stats.hotspot_score.ln_1p() * 0.08).min(0.14);
1936 churn_score += contribution;
1937 max_hotspot_score = max_hotspot_score.max(summary.stats.hotspot_score);
1938 evidence.push(SimilarityEvidence {
1939 source_type: SimilarityEvidenceSource::Churn,
1940 score: round_similarity_score(contribution),
1941 message: "commit touched a query-related historical churn hotspot".into(),
1942 query: Some(path.clone()),
1943 path: Some(PathBuf::from(path)),
1944 symbol: None,
1945 commit_id: Some(commit.id.clone()),
1946 });
1947 }
1948 if churn_score > 0.0 {
1949 score += churn_score.min(0.18);
1950 source_types.insert(SimilarityEvidenceSource::Churn);
1951 }
1952
1953 let rounded_score = round_similarity_score(score.min(1.0));
1954 let confidence = similar_change_confidence(rounded_score, &source_types);
1955 let mut uncertainty = Vec::new();
1956 if source_types == BTreeSet::from([SimilarityEvidenceSource::Path]) {
1957 uncertainty.push("similarity is based only on exact path overlap".into());
1958 }
1959 if confidence == Confidence::Low {
1960 uncertainty
1961 .push("low-confidence historical similarity; inspect the commit before reuse".into());
1962 }
1963 if query.task.is_some() && !source_types.contains(&SimilarityEvidenceSource::TaskText) {
1964 uncertainty.push("task text did not match this commit's summary or message".into());
1965 }
1966 uncertainty.sort();
1967 uncertainty.dedup();
1968
1969 Ok(SimilarChangeHit {
1970 change: HistoricalChangeSummary {
1971 commit: commit.clone(),
1972 touched_paths: touched_paths.into_iter().collect(),
1973 touched_symbols: touched_symbols.into_iter().collect(),
1974 cochange_paths: cochange_paths.into_iter().collect(),
1975 churn_hotspot_score: round_similarity_score(max_hotspot_score),
1976 },
1977 score: rounded_score,
1978 confidence,
1979 evidence,
1980 uncertainty,
1981 })
1982}
1983
1984fn tokenize_similarity_text(text: &str) -> BTreeSet<String> {
1985 const STOP_WORDS: &[&str] = &[
1986 "and", "are", "but", "for", "from", "into", "the", "this", "that", "with", "your", "you",
1987 "fix", "add", "use", "using",
1988 ];
1989 let stop_words = STOP_WORDS.iter().copied().collect::<BTreeSet<_>>();
1990 let mut tokens = BTreeSet::new();
1991 let mut current = String::new();
1992 for ch in text.chars().flat_map(char::to_lowercase) {
1993 if ch.is_ascii_alphanumeric() {
1994 current.push(ch);
1995 } else if !current.is_empty() {
1996 if current.len() >= 3 && !stop_words.contains(current.as_str()) {
1997 tokens.insert(std::mem::take(&mut current));
1998 } else {
1999 current.clear();
2000 }
2001 }
2002 }
2003 if current.len() >= 3 && !stop_words.contains(current.as_str()) {
2004 tokens.insert(current);
2005 }
2006 tokens
2007}
2008
2009fn similarity_symbol_match(query_symbol: &str, touch: &GitSymbolTouch) -> Option<(String, f32)> {
2010 let qualified = touch.qualified_name.to_lowercase();
2011 let symbol_id = touch
2012 .symbol_id
2013 .as_ref()
2014 .map(|id| id.0.to_lowercase())
2015 .unwrap_or_default();
2016 let namespace_tail = qualified.rsplit("::").next().unwrap_or(&qualified);
2017 let short_name = namespace_tail.rsplit('.').next().unwrap_or(namespace_tail);
2018 if query_symbol == qualified || query_symbol == symbol_id || query_symbol == short_name {
2019 Some((touch.qualified_name.clone(), 0.35))
2020 } else if qualified.contains(query_symbol) {
2021 Some((touch.qualified_name.clone(), 0.18))
2022 } else {
2023 None
2024 }
2025}
2026
2027fn similar_change_confidence(
2028 score: f32,
2029 source_types: &BTreeSet<SimilarityEvidenceSource>,
2030) -> Confidence {
2031 let source_count = source_types.len();
2032 if (source_count >= 4 && score >= 0.75) || (source_count >= 3 && score >= 0.55) {
2033 Confidence::High
2034 } else if source_count >= 2 && score >= 0.35 {
2035 Confidence::Medium
2036 } else {
2037 Confidence::Low
2038 }
2039}
2040
2041fn round_similarity_score(score: f32) -> f32 {
2042 (score * 1000.0).round() / 1000.0
2043}
2044
2045fn lower_history_confidence(left: Confidence, right: Confidence) -> Confidence {
2046 if history_confidence_rank(left) <= history_confidence_rank(right) {
2047 left
2048 } else {
2049 right
2050 }
2051}
2052
2053fn history_confidence_rank(confidence: Confidence) -> u8 {
2054 match confidence {
2055 Confidence::Low => 0,
2056 Confidence::Medium => 1,
2057 Confidence::High => 2,
2058 Confidence::Exact => 3,
2059 }
2060}
2061const DEFAULT_GRAPH_QUERY_LIMIT: usize = 100;
2062const MAX_GRAPH_QUERY_LIMIT: usize = 1_000;
2063
2064struct IndexRows<'a> {
2065 files: &'a [File],
2066 symbols: &'a [Symbol],
2067 chunks: &'a [CodeChunk],
2068 tests: &'a [TestTarget],
2069 imports: &'a [Import],
2070 occurrences: &'a [SymbolOccurrence],
2071 analysis_facts: &'a [AnalysisFact],
2072}
2073
2074fn insert_index_rows(tx: &Transaction<'_>, rows: IndexRows<'_>) -> Result<()> {
2075 for file in rows.files {
2076 tx.execute(
2077 "INSERT INTO files(id, path, json) VALUES(?1, ?2, ?3)",
2078 params![
2079 &file.id.0,
2080 file.path.to_string_lossy().as_ref(),
2081 serde_json::to_string(file)?
2082 ],
2083 )
2084 .map_err(storage_err)?;
2085 }
2086 for symbol in rows.symbols {
2087 tx.execute(
2088 "INSERT INTO symbols(id, name, qualified_name, file_id, json) VALUES(?1, ?2, ?3, ?4, ?5)",
2089 params![
2090 &symbol.id.0,
2091 &symbol.name,
2092 &symbol.qualified_name,
2093 &symbol.file_id.0,
2094 serde_json::to_string(symbol)?
2095 ],
2096 )
2097 .map_err(storage_err)?;
2098 }
2099 for chunk in rows.chunks {
2100 tx.execute(
2101 "INSERT INTO chunks(id, file_id, start_line, end_line, text, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
2102 params![
2103 &chunk.id,
2104 &chunk.file_id.0,
2105 chunk.range.start,
2106 chunk.range.end,
2107 &chunk.text,
2108 serde_json::to_string(chunk)?
2109 ],
2110 )
2111 .map_err(storage_err)?;
2112 }
2113 for test in rows.tests {
2114 tx.execute(
2115 "INSERT INTO tests(id, file_id, json) VALUES(?1, ?2, ?3) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
2116 params![&test.id, &test.file_id.0, serde_json::to_string(test)?],
2117 )
2118 .map_err(storage_err)?;
2119 }
2120 for import in rows.imports {
2121 tx.execute(
2122 "INSERT INTO imports(id, file_id, imported, json) VALUES(?1, ?2, ?3, ?4)",
2123 params![
2124 occurrence_id(
2125 &import.file_id.0,
2126 &import.imported,
2127 import.range.as_ref().map(|range| range.start),
2128 true
2129 ),
2130 &import.file_id.0,
2131 &import.imported,
2132 serde_json::to_string(import)?
2133 ],
2134 )
2135 .map_err(storage_err)?;
2136 }
2137 for occurrence in rows.occurrences {
2138 tx.execute(
2139 "INSERT INTO occurrences(id, symbol_id, file_id, is_definition, json) VALUES(?1, ?2, ?3, ?4, ?5)",
2140 params![
2141 occurrence_id(
2142 &occurrence.file_id.0,
2143 &occurrence.symbol_id.0,
2144 occurrence.range.as_ref().map(|range| range.start),
2145 occurrence.is_definition,
2146 ),
2147 &occurrence.symbol_id.0,
2148 &occurrence.file_id.0,
2149 if occurrence.is_definition { 1 } else { 0 },
2150 serde_json::to_string(occurrence)?
2151 ],
2152 )
2153 .map_err(storage_err)?;
2154 }
2155 for fact in rows.analysis_facts {
2156 tx.execute(
2157 "INSERT INTO analysis_facts(id, file_id, source_type, target, json) VALUES(?1, ?2, ?3, ?4, ?5)",
2158 params![
2159 &fact.id,
2160 &fact.file_id.0,
2161 source_type_name(&fact.source_type),
2162 &fact.target,
2163 serde_json::to_string(fact)?
2164 ],
2165 )
2166 .map_err(storage_err)?;
2167 }
2168 Ok(())
2169}
2170
2171fn insert_graph_rows(tx: &Transaction<'_>, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
2172 for node in nodes {
2173 let evidence_available = node.file_id.is_some() || node.symbol_id.is_some();
2174 tx.execute(
2175 "INSERT INTO graph_nodes(id, label, node_type, file_id, symbol_id, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
2176 params![
2177 &node.id.0,
2178 &node.label,
2179 format!("{:?}", node.node_type),
2180 node.file_id.as_ref().map(|f| &f.0),
2181 node.symbol_id.as_ref().map(|s| &s.0),
2182 evidence_available,
2183 0_i64,
2184 serde_json::to_string(node)?
2185 ],
2186 )
2187 .map_err(storage_err)?;
2188 }
2189 for edge in edges {
2190 let freshness = edge.evidence.indexed_at.timestamp();
2191 tx.execute(
2192 "INSERT INTO graph_edges(id, from_id, to_id, edge_type, confidence, source_type, source_file, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
2193 params![
2194 &edge.id.0,
2195 &edge.from.0,
2196 &edge.to.0,
2197 format!("{:?}", edge.edge_type),
2198 format!("{:?}", edge.evidence.confidence),
2199 format!("{:?}", edge.evidence.source_type),
2200 &edge.evidence.source,
2201 true,
2202 freshness,
2203 serde_json::to_string(edge)?
2204 ],
2205 )
2206 .map_err(storage_err)?;
2207 }
2208 Ok(())
2209}
2210
2211fn clamp_limit(limit: usize) -> usize {
2212 if limit == 0 {
2213 DEFAULT_GRAPH_QUERY_LIMIT
2214 } else {
2215 limit.min(MAX_GRAPH_QUERY_LIMIT)
2216 }
2217}
2218
2219impl GraphStore for SqliteStore {
2220 fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
2221 let mut conn = self
2222 .connection
2223 .lock()
2224 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2225 let tx = conn.transaction().map_err(storage_err)?;
2226 tx.execute("DELETE FROM graph_edges", [])
2227 .map_err(storage_err)?;
2228 tx.execute("DELETE FROM graph_nodes", [])
2229 .map_err(storage_err)?;
2230 insert_graph_rows(&tx, nodes, edges)?;
2231 tx.commit().map_err(storage_err)?;
2232 Ok(())
2233 }
2234
2235 fn node_type_stats(
2236 &self,
2237 ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
2238 let conn = self
2239 .connection
2240 .lock()
2241 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2242 let mut stmt = conn
2243 .prepare("SELECT node_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_nodes GROUP BY node_type")
2244 .map_err(storage_err)?;
2245 let mut rows = stmt.query([]).map_err(storage_err)?;
2246 let mut map = std::collections::HashMap::new();
2247 while let Some(row) = rows.next().map_err(storage_err)? {
2248 let t: String = row.get(0).map_err(storage_err)?;
2249 let c: i64 = row.get(1).map_err(storage_err)?;
2250 let ev: bool = row.get(2).unwrap_or(false);
2251 let fr: Option<i64> = row.get(3).unwrap_or(None);
2252 map.insert(
2253 t,
2254 open_kioku_storage::TypeStats {
2255 count: c as usize,
2256 evidence_available: ev,
2257 freshness: fr.map(|v| v as u64),
2258 },
2259 );
2260 }
2261 Ok(map)
2262 }
2263
2264 fn edge_type_stats(
2265 &self,
2266 ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
2267 let conn = self
2268 .connection
2269 .lock()
2270 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2271 let mut stmt = conn
2272 .prepare("SELECT edge_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_edges GROUP BY edge_type")
2273 .map_err(storage_err)?;
2274 let mut rows = stmt.query([]).map_err(storage_err)?;
2275 let mut map = std::collections::HashMap::new();
2276 while let Some(row) = rows.next().map_err(storage_err)? {
2277 let t: String = row.get(0).map_err(storage_err)?;
2278 let c: i64 = row.get(1).map_err(storage_err)?;
2279 let ev: bool = row.get(2).unwrap_or(false);
2280 let fr: Option<i64> = row.get(3).unwrap_or(None);
2281 map.insert(
2282 t,
2283 open_kioku_storage::TypeStats {
2284 count: c as usize,
2285 evidence_available: ev,
2286 freshness: fr.map(|v| v as u64),
2287 },
2288 );
2289 }
2290 Ok(map)
2291 }
2292
2293 fn node_by_id(&self, id: &str) -> Result<Option<GraphNode>> {
2294 let conn = self
2295 .connection
2296 .lock()
2297 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2298 graph_node_by_id(&conn, id)
2299 }
2300
2301 fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)> {
2302 let conn = self
2303 .connection
2304 .lock()
2305 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2306 let mut stmt = conn
2307 .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 OR to_id = ?1 LIMIT ?2")
2308 .map_err(storage_err)?;
2309 let rows = stmt
2310 .query_map(params![node, limit as i64], |row| row.get::<_, String>(0))
2311 .map_err(storage_err)?;
2312 let edges: Vec<GraphEdge> = collect_json(rows)?;
2313 let mut ids = edges
2314 .iter()
2315 .flat_map(|edge| [edge.from.0.clone(), edge.to.0.clone()])
2316 .collect::<Vec<_>>();
2317 ids.sort();
2318 ids.dedup();
2319 let mut nodes = Vec::new();
2320 for id in ids {
2321 if let Some(node) = graph_node_by_id(&conn, &id)? {
2322 nodes.push(node);
2323 }
2324 }
2325 Ok((nodes, edges))
2326 }
2327
2328 fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>> {
2329 use std::collections::{HashSet, VecDeque};
2330
2331 let conn = self
2332 .connection
2333 .lock()
2334 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2335
2336 let mut edge_stmt = conn
2339 .prepare("SELECT json FROM graph_edges WHERE from_id = ?1")
2340 .map_err(storage_err)?;
2341
2342 let mut queue = VecDeque::from([(from.to_string(), Vec::<GraphEdge>::new())]);
2343 let mut seen = HashSet::new();
2344 while let Some((node, path)) = queue.pop_front() {
2345 if node == to {
2346 return Ok(path);
2347 }
2348 if path.len() >= max_depth || !seen.insert(node.clone()) {
2349 continue;
2350 }
2351 let rows = edge_stmt
2352 .query_map(params![&node], |row| row.get::<_, String>(0))
2353 .map_err(storage_err)?;
2354 let edges: Vec<GraphEdge> = collect_json(rows)?;
2355 for edge in edges {
2356 let mut next_path = path.clone();
2357 next_path.push(edge.clone());
2358 queue.push_back((edge.to.0.clone(), next_path));
2359 }
2360 }
2361 Ok(Vec::new())
2362 }
2363 fn nodes_by_type(
2364 &self,
2365 node_type: GraphNodeType,
2366 limit: usize,
2367 offset: usize,
2368 ) -> Result<Vec<GraphNode>> {
2369 let conn = self
2370 .connection
2371 .lock()
2372 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2373 let limit = clamp_limit(limit) as i64;
2374 let offset = offset as i64;
2375 let type_str = format!("{:?}", node_type);
2376 let mut stmt = conn
2377 .prepare(
2378 "SELECT json FROM graph_nodes WHERE node_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
2379 )
2380 .map_err(storage_err)?;
2381 let rows = stmt
2382 .query_map(params![type_str, limit, offset], |row| {
2383 row.get::<_, String>(0)
2384 })
2385 .map_err(storage_err)?;
2386 collect_json(rows)
2387 }
2388
2389 fn all_graph_nodes(&self) -> Result<Vec<GraphNode>> {
2390 let conn = self
2391 .connection
2392 .lock()
2393 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2394 let mut stmt = conn
2395 .prepare("SELECT json FROM graph_nodes ORDER BY id")
2396 .map_err(storage_err)?;
2397 let rows = stmt
2398 .query_map([], |row| row.get::<_, String>(0))
2399 .map_err(storage_err)?;
2400 collect_json(rows)
2401 }
2402
2403 fn edges_by_type(
2404 &self,
2405 edge_type: GraphEdgeType,
2406 limit: usize,
2407 offset: usize,
2408 ) -> Result<Vec<GraphEdge>> {
2409 let conn = self
2410 .connection
2411 .lock()
2412 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2413 let limit = clamp_limit(limit) as i64;
2414 let offset = offset as i64;
2415 let type_str = format!("{:?}", edge_type);
2416 let mut stmt = conn
2417 .prepare(
2418 "SELECT json FROM graph_edges WHERE edge_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
2419 )
2420 .map_err(storage_err)?;
2421 let rows = stmt
2422 .query_map(params![type_str, limit, offset], |row| {
2423 row.get::<_, String>(0)
2424 })
2425 .map_err(storage_err)?;
2426 collect_json(rows)
2427 }
2428
2429 fn graph_counts(&self) -> Result<GraphCounts> {
2430 let conn = self
2431 .connection
2432 .lock()
2433 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2434 let nodes: usize = conn
2435 .query_row("SELECT COUNT(*) FROM graph_nodes", [], |row| row.get(0))
2436 .map_err(storage_err)?;
2437 let edges: usize = conn
2438 .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
2439 .map_err(storage_err)?;
2440 Ok(GraphCounts { nodes, edges })
2441 }
2442
2443 fn graph_schema_counts(&self) -> Result<GraphSchemaCounts> {
2444 let conn = self
2445 .connection
2446 .lock()
2447 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2448
2449 let mut node_types = std::collections::BTreeMap::new();
2450 let mut stmt = conn
2451 .prepare("SELECT node_type, COUNT(*) FROM graph_nodes GROUP BY node_type")
2452 .map_err(storage_err)?;
2453 let mut rows = stmt.query([]).map_err(storage_err)?;
2454 while let Some(row) = rows.next().map_err(storage_err)? {
2455 let ntype: String = row.get(0).map_err(storage_err)?;
2456 let count: usize = row.get(1).map_err(storage_err)?;
2457 if !ntype.is_empty() {
2458 node_types.insert(ntype, count);
2459 }
2460 }
2461
2462 let mut edge_types = std::collections::BTreeMap::new();
2463 let mut stmt = conn
2464 .prepare("SELECT edge_type, COUNT(*) FROM graph_edges GROUP BY edge_type")
2465 .map_err(storage_err)?;
2466 let mut rows = stmt.query([]).map_err(storage_err)?;
2467 while let Some(row) = rows.next().map_err(storage_err)? {
2468 let etype: String = row.get(0).map_err(storage_err)?;
2469 let count: usize = row.get(1).map_err(storage_err)?;
2470 if !etype.is_empty() {
2471 edge_types.insert(etype, count);
2472 }
2473 }
2474
2475 Ok(GraphSchemaCounts {
2476 node_types,
2477 edge_types,
2478 })
2479 }
2480
2481 fn graph_edges_between(&self, from: &str, to: &str, limit: usize) -> Result<Vec<GraphEdge>> {
2482 let conn = self
2483 .connection
2484 .lock()
2485 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
2486 let limit = clamp_limit(limit) as i64;
2487 let mut stmt = conn
2488 .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 AND to_id = ?2 ORDER BY id LIMIT ?3")
2489 .map_err(storage_err)?;
2490 let rows = stmt
2491 .query_map(params![from, to, limit], |row| row.get::<_, String>(0))
2492 .map_err(storage_err)?;
2493 collect_json(rows)
2494 }
2495}
2496
2497fn is_duplicate_column(err: &rusqlite::Error) -> bool {
2498 if let rusqlite::Error::SqliteFailure(_, Some(msg)) = err {
2499 msg.contains("duplicate column name")
2500 } else {
2501 false
2502 }
2503}
2504
2505fn add_column_if_not_exists(conn: &mut Connection, stmt: &str) -> Result<()> {
2506 match conn.execute(stmt, []) {
2507 Ok(_) => Ok(()),
2508 Err(err) if is_duplicate_column(&err) => Ok(()),
2509 Err(err) => Err(storage_err(err)),
2510 }
2511}
2512
2513fn migrate_graph_schema(conn: &mut Connection) -> Result<()> {
2514 add_column_if_not_exists(
2516 conn,
2517 "ALTER TABLE graph_nodes ADD COLUMN node_type TEXT DEFAULT ''",
2518 )?;
2519 add_column_if_not_exists(
2520 conn,
2521 "ALTER TABLE graph_nodes ADD COLUMN file_id TEXT DEFAULT ''",
2522 )?;
2523 add_column_if_not_exists(
2524 conn,
2525 "ALTER TABLE graph_nodes ADD COLUMN symbol_id TEXT DEFAULT ''",
2526 )?;
2527 add_column_if_not_exists(
2528 conn,
2529 "ALTER TABLE graph_nodes ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
2530 )?;
2531 add_column_if_not_exists(
2532 conn,
2533 "ALTER TABLE graph_nodes ADD COLUMN freshness INTEGER DEFAULT 0",
2534 )?;
2535
2536 add_column_if_not_exists(
2538 conn,
2539 "ALTER TABLE graph_edges ADD COLUMN confidence TEXT DEFAULT ''",
2540 )?;
2541 add_column_if_not_exists(
2542 conn,
2543 "ALTER TABLE graph_edges ADD COLUMN source_type TEXT DEFAULT ''",
2544 )?;
2545 add_column_if_not_exists(
2546 conn,
2547 "ALTER TABLE graph_edges ADD COLUMN source_file TEXT DEFAULT ''",
2548 )?;
2549 add_column_if_not_exists(
2550 conn,
2551 "ALTER TABLE graph_edges ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
2552 )?;
2553 add_column_if_not_exists(
2554 conn,
2555 "ALTER TABLE graph_edges ADD COLUMN freshness INTEGER DEFAULT 0",
2556 )?;
2557
2558 backfill_graph_query_columns(conn)?;
2559
2560 conn.execute(
2562 "CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(node_type)",
2563 [],
2564 )
2565 .map_err(storage_err)?;
2566 conn.execute(
2567 "CREATE INDEX IF NOT EXISTS idx_graph_nodes_file ON graph_nodes(file_id)",
2568 [],
2569 )
2570 .map_err(storage_err)?;
2571 conn.execute(
2572 "CREATE INDEX IF NOT EXISTS idx_graph_nodes_symbol ON graph_nodes(symbol_id)",
2573 [],
2574 )
2575 .map_err(storage_err)?;
2576 conn.execute(
2577 "CREATE INDEX IF NOT EXISTS idx_graph_edges_type ON graph_edges(edge_type)",
2578 [],
2579 )
2580 .map_err(storage_err)?;
2581 conn.execute(
2582 "CREATE INDEX IF NOT EXISTS idx_graph_edges_from_type ON graph_edges(from_id, edge_type)",
2583 [],
2584 )
2585 .map_err(storage_err)?;
2586 conn.execute(
2587 "CREATE INDEX IF NOT EXISTS idx_graph_edges_to_type ON graph_edges(to_id, edge_type)",
2588 [],
2589 )
2590 .map_err(storage_err)?;
2591 conn.execute(
2592 "CREATE INDEX IF NOT EXISTS idx_graph_edges_source_type ON graph_edges(source_type)",
2593 [],
2594 )
2595 .map_err(storage_err)?;
2596
2597 let version: i64 = conn
2598 .pragma_query_value(None, "user_version", |row| row.get(0))
2599 .map_err(storage_err)?;
2600 if version < SQLITE_GRAPH_SCHEMA_VERSION {
2601 conn.pragma_update(None, "user_version", SQLITE_GRAPH_SCHEMA_VERSION)
2602 .map_err(storage_err)?;
2603 }
2604
2605 Ok(())
2606}
2607
2608fn backfill_graph_query_columns(conn: &mut Connection) -> Result<()> {
2609 let node_rows = {
2610 let mut stmt = conn
2611 .prepare(
2612 "SELECT id, json FROM graph_nodes
2613 WHERE COALESCE(node_type, '') = ''
2614 OR COALESCE(file_id, '') = ''
2615 OR COALESCE(symbol_id, '') = ''",
2616 )
2617 .map_err(storage_err)?;
2618 let rows = stmt
2619 .query_map([], |row| {
2620 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2621 })
2622 .map_err(storage_err)?;
2623 let mut rows_out = Vec::new();
2624 for row in rows {
2625 rows_out.push(row.map_err(storage_err)?);
2626 }
2627 rows_out
2628 };
2629 if !node_rows.is_empty() {
2630 let tx = conn.transaction().map_err(storage_err)?;
2631 for (id, json) in node_rows {
2632 let Ok(node) = serde_json::from_str::<GraphNode>(&json) else {
2633 continue;
2634 };
2635 tx.execute(
2636 "UPDATE graph_nodes
2637 SET node_type = ?1,
2638 file_id = ?2,
2639 symbol_id = ?3,
2640 evidence_available = ?4
2641 WHERE id = ?5",
2642 params![
2643 format!("{:?}", node.node_type),
2644 node.file_id.as_ref().map(|file_id| file_id.0.as_str()),
2645 node.symbol_id
2646 .as_ref()
2647 .map(|symbol_id| symbol_id.0.as_str()),
2648 node.file_id.is_some() || node.symbol_id.is_some(),
2649 id,
2650 ],
2651 )
2652 .map_err(storage_err)?;
2653 }
2654 tx.commit().map_err(storage_err)?;
2655 }
2656
2657 let edge_rows = {
2658 let mut stmt = conn
2659 .prepare(
2660 "SELECT id, json FROM graph_edges
2661 WHERE COALESCE(edge_type, '') = ''
2662 OR COALESCE(confidence, '') = ''
2663 OR COALESCE(source_type, '') = ''
2664 OR COALESCE(source_file, '') = ''",
2665 )
2666 .map_err(storage_err)?;
2667 let rows = stmt
2668 .query_map([], |row| {
2669 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2670 })
2671 .map_err(storage_err)?;
2672 let mut rows_out = Vec::new();
2673 for row in rows {
2674 rows_out.push(row.map_err(storage_err)?);
2675 }
2676 rows_out
2677 };
2678 if !edge_rows.is_empty() {
2679 let tx = conn.transaction().map_err(storage_err)?;
2680 for (id, json) in edge_rows {
2681 let Ok(edge) = serde_json::from_str::<GraphEdge>(&json) else {
2682 continue;
2683 };
2684 tx.execute(
2685 "UPDATE graph_edges
2686 SET from_id = ?1,
2687 to_id = ?2,
2688 edge_type = ?3,
2689 confidence = ?4,
2690 source_type = ?5,
2691 source_file = ?6,
2692 evidence_available = ?7,
2693 freshness = ?8
2694 WHERE id = ?9",
2695 params![
2696 edge.from.0.as_str(),
2697 edge.to.0.as_str(),
2698 format!("{:?}", edge.edge_type),
2699 format!("{:?}", edge.evidence.confidence),
2700 format!("{:?}", edge.evidence.source_type),
2701 edge.evidence.source.as_str(),
2702 true,
2703 edge.evidence.indexed_at.timestamp(),
2704 id,
2705 ],
2706 )
2707 .map_err(storage_err)?;
2708 }
2709 tx.commit().map_err(storage_err)?;
2710 }
2711
2712 Ok(())
2713}
2714
2715fn migrate_history_schema(conn: &mut Connection) -> Result<()> {
2716 ensure_supported_sqlite_schema(conn)?;
2717 let version: i64 = conn
2718 .pragma_query_value(None, "user_version", |row| row.get(0))
2719 .map_err(storage_err)?;
2720 let tx = conn.transaction().map_err(storage_err)?;
2721 tx.execute_batch(HISTORY_SCHEMA_V1).map_err(storage_err)?;
2722 if version < SQLITE_HISTORY_SCHEMA_VERSION {
2723 tx.pragma_update(None, "user_version", SQLITE_HISTORY_SCHEMA_VERSION)
2724 .map_err(storage_err)?;
2725 }
2726 tx.commit().map_err(storage_err)?;
2727 Ok(())
2728}
2729
2730fn ensure_supported_sqlite_schema(conn: &Connection) -> Result<()> {
2731 let version: i64 = conn
2732 .pragma_query_value(None, "user_version", |row| row.get(0))
2733 .map_err(storage_err)?;
2734 if version > SQLITE_SUPPORTED_SCHEMA_VERSION {
2735 return Err(OkError::Storage(format!(
2736 "sqlite schema version {version} is newer than supported version {SQLITE_SUPPORTED_SCHEMA_VERSION}"
2737 )));
2738 }
2739 Ok(())
2740}
2741
2742fn validate_history_snapshot(snapshot: &HistorySnapshot) -> Result<()> {
2743 if snapshot.schema_version != HISTORY_SCHEMA_VERSION {
2744 return Err(OkError::Storage(format!(
2745 "unsupported history snapshot schema version {}; expected {}",
2746 snapshot.schema_version, HISTORY_SCHEMA_VERSION
2747 )));
2748 }
2749
2750 let mut commit_ids = BTreeSet::new();
2751 for commit in &snapshot.commits {
2752 validate_text("commit id", &commit.id.0)?;
2753 if !commit_ids.insert(commit.id.0.clone()) {
2754 return Err(OkError::Storage(format!(
2755 "duplicate history commit id `{}`",
2756 commit.id
2757 )));
2758 }
2759 validate_text("commit author name", &commit.author.name)?;
2760 if let Some(committer) = &commit.committer {
2761 validate_text("commit committer name", &committer.name)?;
2762 }
2763 let mut parent_ids = BTreeSet::new();
2764 for parent_id in &commit.parent_ids {
2765 validate_text("parent commit id", &parent_id.0)?;
2766 if !parent_ids.insert(parent_id.0.as_str()) {
2767 return Err(OkError::Storage(format!(
2768 "commit `{}` contains duplicate parent `{parent_id}`",
2769 commit.id
2770 )));
2771 }
2772 }
2773 }
2774
2775 let mut file_touch_ids = BTreeSet::new();
2776 for touch in &snapshot.file_touches {
2777 validate_history_record_id(&touch.id, "file touch", &mut file_touch_ids)?;
2778 validate_commit_reference(&touch.commit_id, &commit_ids, "file touch")?;
2779 history_path(&touch.path)?;
2780 if let Some(previous_path) = &touch.previous_path {
2781 history_path(previous_path)?;
2782 }
2783 }
2784
2785 let mut symbol_touch_ids = BTreeSet::new();
2786 for touch in &snapshot.symbol_touches {
2787 validate_history_record_id(&touch.id, "symbol touch", &mut symbol_touch_ids)?;
2788 validate_commit_reference(&touch.commit_id, &commit_ids, "symbol touch")?;
2789 validate_text("symbol qualified name", &touch.qualified_name)?;
2790 history_path(&touch.file_path)?;
2791 }
2792
2793 let mut cochange_ids = BTreeSet::new();
2794 let mut cochange_pairs = BTreeSet::new();
2795 for edge in &snapshot.cochange_edges {
2796 validate_history_record_id(&edge.id, "co-change edge", &mut cochange_ids)?;
2797 let path = history_path(&edge.path)?;
2798 let cochanged_path = history_path(&edge.cochanged_path)?;
2799 if path == cochanged_path {
2800 return Err(OkError::Storage(format!(
2801 "co-change edge `{}` must connect two different paths",
2802 edge.id
2803 )));
2804 }
2805 if !cochange_pairs.insert((path.clone(), cochanged_path.clone())) {
2806 return Err(OkError::Storage(format!(
2807 "duplicate co-change edge `{path}` -> `{cochanged_path}`"
2808 )));
2809 }
2810 if edge.commit_count == 0 {
2811 return Err(OkError::Storage(format!(
2812 "co-change edge `{}` must have a positive commit count",
2813 edge.id
2814 )));
2815 }
2816 if !edge.recency_weight.is_finite() || edge.recency_weight < 0.0 {
2817 return Err(OkError::Storage(format!(
2818 "co-change edge `{}` has invalid recency weight {}",
2819 edge.id, edge.recency_weight
2820 )));
2821 }
2822 let mut sample_commits = BTreeSet::new();
2823 for commit_id in &edge.sample_commits {
2824 validate_text("sample commit id", &commit_id.0)?;
2825 if !sample_commits.insert(commit_id.0.as_str()) {
2826 return Err(OkError::Storage(format!(
2827 "co-change edge `{}` contains duplicate sample commit `{commit_id}`",
2828 edge.id
2829 )));
2830 }
2831 }
2832 }
2833
2834 let mut reviewer_ids = BTreeSet::new();
2835 for evidence in &snapshot.reviewer_evidence {
2836 validate_history_record_id(&evidence.id, "review event", &mut reviewer_ids)?;
2837 validate_text("reviewer name", &evidence.reviewer.name)?;
2838 validate_text("review evidence source", &evidence.source)?;
2839 if let Some(commit_id) = &evidence.commit_id {
2840 validate_text("review commit id", &commit_id.0)?;
2841 }
2842 if let Some(path) = &evidence.path {
2843 history_path(path)?;
2844 }
2845 }
2846
2847 Ok(())
2848}
2849
2850fn validate_history_record_id(
2851 id: &HistoryRecordId,
2852 kind: &str,
2853 ids: &mut BTreeSet<String>,
2854) -> Result<()> {
2855 validate_text(&format!("{kind} id"), &id.0)?;
2856 if !ids.insert(id.0.clone()) {
2857 return Err(OkError::Storage(format!("duplicate {kind} id `{id}`")));
2858 }
2859 Ok(())
2860}
2861
2862fn validate_commit_reference(
2863 commit_id: &GitCommitId,
2864 commit_ids: &BTreeSet<String>,
2865 kind: &str,
2866) -> Result<()> {
2867 validate_text("commit id", &commit_id.0)?;
2868 if !commit_ids.contains(&commit_id.0) {
2869 return Err(OkError::Storage(format!(
2870 "{kind} references missing commit `{commit_id}`"
2871 )));
2872 }
2873 Ok(())
2874}
2875
2876fn validate_text(field: &str, value: &str) -> Result<()> {
2877 if value.trim().is_empty() {
2878 return Err(OkError::Storage(format!("{field} must not be empty")));
2879 }
2880 Ok(())
2881}
2882
2883fn history_path(path: &Path) -> Result<String> {
2884 if path.as_os_str().is_empty()
2885 || path.is_absolute()
2886 || path
2887 .components()
2888 .any(|component| !matches!(component, std::path::Component::Normal(_)))
2889 {
2890 return Err(OkError::Storage(format!(
2891 "history path must be a normalized repository-relative path: {}",
2892 path.display()
2893 )));
2894 }
2895 let value = path.to_str().ok_or_else(|| {
2896 OkError::Storage(format!(
2897 "history path must be valid UTF-8: {}",
2898 path.display()
2899 ))
2900 })?;
2901 if value.contains('\\') {
2902 return Err(OkError::Storage(format!(
2903 "history path must use `/` separators: {}",
2904 path.display()
2905 )));
2906 }
2907 Ok(value.to_string())
2908}
2909
2910#[derive(Debug, Clone)]
2911struct ChurnTouchSample {
2912 id: String,
2913 touched_at: DateTime<Utc>,
2914 additions: u32,
2915 deletions: u32,
2916 confidence: Confidence,
2917 uncertainty: Vec<String>,
2918}
2919
2920fn materialize_churn_summaries(snapshot: &HistorySnapshot) -> Result<Vec<ChurnSummary>> {
2921 let Some(reference_at) = newest_history_touch(snapshot) else {
2922 return Ok(Vec::new());
2923 };
2924
2925 let mut file_samples = BTreeMap::<String, Vec<ChurnTouchSample>>::new();
2926 let mut file_aliases = Vec::<(String, String)>::new();
2927 let mut module_samples = BTreeMap::<String, BTreeMap<String, ChurnTouchSample>>::new();
2928 let mut symbol_samples = BTreeMap::<String, SymbolChurnAccumulator>::new();
2929
2930 for touch in &snapshot.file_touches {
2931 let path = history_path(&touch.path)?;
2932 let sample = ChurnTouchSample {
2933 id: touch.id.0.clone(),
2934 touched_at: touch.touched_at,
2935 additions: touch.additions.unwrap_or_default(),
2936 deletions: touch.deletions.unwrap_or_default(),
2937 confidence: Confidence::Exact,
2938 uncertainty: Vec::new(),
2939 };
2940 file_samples
2941 .entry(path.clone())
2942 .or_default()
2943 .push(sample.clone());
2944 if let Some(previous_path) = &touch.previous_path {
2945 file_aliases.push((path, history_path(previous_path)?));
2946 }
2947 }
2948 let file_samples = expand_file_churn_aliases(file_samples, file_aliases);
2949 for (path, samples) in &file_samples {
2950 for module in churn_modules_for_path(Path::new(path)) {
2951 let module_entry = module_samples.entry(module).or_default();
2952 for sample in samples {
2953 module_entry.insert(sample.id.clone(), sample.clone());
2954 }
2955 }
2956 }
2957
2958 for touch in &snapshot.symbol_touches {
2959 let Some(symbol_id) = &touch.symbol_id else {
2960 continue;
2961 };
2962 let file_path = history_path(&touch.file_path)?;
2963 let entry = symbol_samples
2964 .entry(symbol_id.0.clone())
2965 .or_insert_with(|| SymbolChurnAccumulator {
2966 file_path: PathBuf::from(&file_path),
2967 symbol_id: symbol_id.clone(),
2968 qualified_name: touch.qualified_name.clone(),
2969 samples: Vec::new(),
2970 saw_uncertainty: false,
2971 });
2972 entry.samples.push(ChurnTouchSample {
2973 id: touch.id.0.clone(),
2974 touched_at: touch.touched_at,
2975 additions: 0,
2976 deletions: 0,
2977 confidence: touch.confidence,
2978 uncertainty: touch.uncertainty.clone(),
2979 });
2980 if !touch.uncertainty.is_empty() {
2981 entry.saw_uncertainty = true;
2982 }
2983 }
2984
2985 let mut summaries = Vec::new();
2986 for (path, samples) in file_samples {
2987 summaries.push(ChurnSummary {
2988 entity_kind: ChurnEntityKind::File,
2989 key: path.clone(),
2990 path: Some(PathBuf::from(path)),
2991 symbol_id: None,
2992 qualified_name: None,
2993 generated_at: reference_at,
2994 stats: churn_stats(&samples, reference_at),
2995 confidence: Confidence::Exact,
2996 uncertainty: Vec::new(),
2997 });
2998 }
2999 for (module, samples) in module_samples {
3000 let samples = samples.into_values().collect::<Vec<_>>();
3001 summaries.push(ChurnSummary {
3002 entity_kind: ChurnEntityKind::Module,
3003 key: module.clone(),
3004 path: Some(PathBuf::from(module)),
3005 symbol_id: None,
3006 qualified_name: None,
3007 generated_at: reference_at,
3008 stats: churn_stats(&samples, reference_at),
3009 confidence: Confidence::Medium,
3010 uncertainty: vec![
3011 "module churn is aggregated from persisted file touches in this directory tree"
3012 .into(),
3013 ],
3014 });
3015 }
3016 for (key, entry) in symbol_samples {
3017 let mut uncertainty = entry
3018 .samples
3019 .iter()
3020 .flat_map(|sample| sample.uncertainty.iter().cloned())
3021 .collect::<BTreeSet<_>>()
3022 .into_iter()
3023 .collect::<Vec<_>>();
3024 if entry.saw_uncertainty {
3025 uncertainty.push("symbol churn inherits uncertainty from line-level history".into());
3026 }
3027 summaries.push(ChurnSummary {
3028 entity_kind: ChurnEntityKind::Symbol,
3029 key,
3030 path: Some(entry.file_path),
3031 symbol_id: Some(entry.symbol_id),
3032 qualified_name: Some(entry.qualified_name),
3033 generated_at: reference_at,
3034 stats: churn_stats(&entry.samples, reference_at),
3035 confidence: minimum_churn_confidence(&entry.samples),
3036 uncertainty,
3037 });
3038 }
3039
3040 summaries.sort_by(|left, right| {
3041 left.entity_kind
3042 .cmp(&right.entity_kind)
3043 .then_with(|| {
3044 right
3045 .stats
3046 .hotspot_score
3047 .total_cmp(&left.stats.hotspot_score)
3048 })
3049 .then_with(|| right.stats.touch_count.cmp(&left.stats.touch_count))
3050 .then_with(|| left.key.cmp(&right.key))
3051 });
3052 Ok(summaries)
3053}
3054
3055#[derive(Debug, Clone)]
3056struct SymbolChurnAccumulator {
3057 file_path: PathBuf,
3058 symbol_id: SymbolId,
3059 qualified_name: String,
3060 samples: Vec<ChurnTouchSample>,
3061 saw_uncertainty: bool,
3062}
3063
3064fn newest_history_touch(snapshot: &HistorySnapshot) -> Option<DateTime<Utc>> {
3065 snapshot
3066 .file_touches
3067 .iter()
3068 .map(|touch| touch.touched_at)
3069 .chain(snapshot.symbol_touches.iter().map(|touch| touch.touched_at))
3070 .max()
3071}
3072
3073fn churn_modules_for_path(path: &Path) -> Vec<String> {
3074 let mut modules = Vec::new();
3075 let mut current = path.parent();
3076 while let Some(parent) = current {
3077 let key = if parent.as_os_str().is_empty() {
3078 "__root__".to_string()
3079 } else {
3080 parent.to_string_lossy().to_string()
3081 };
3082 modules.push(key);
3083 current = parent.parent();
3084 }
3085 if modules.is_empty() {
3086 modules.push("__root__".to_string());
3087 }
3088 modules
3089}
3090
3091fn expand_file_churn_aliases(
3092 samples: BTreeMap<String, Vec<ChurnTouchSample>>,
3093 aliases: Vec<(String, String)>,
3094) -> BTreeMap<String, Vec<ChurnTouchSample>> {
3095 if aliases.is_empty() {
3096 return samples;
3097 }
3098
3099 let mut groups = samples
3100 .keys()
3101 .map(|path| BTreeSet::from([path.clone()]))
3102 .collect::<Vec<_>>();
3103 for (path, previous_path) in aliases {
3104 merge_file_alias_group(&mut groups, path, previous_path);
3105 }
3106
3107 let mut expanded = BTreeMap::new();
3108 for group in groups {
3109 let mut combined = Vec::new();
3110 for path in &group {
3111 if let Some(path_samples) = samples.get(path) {
3112 combined.extend(path_samples.clone());
3113 }
3114 }
3115 if combined.is_empty() {
3116 continue;
3117 }
3118 for path in group {
3119 expanded.insert(path, combined.clone());
3120 }
3121 }
3122 expanded
3123}
3124
3125fn merge_file_alias_group(groups: &mut Vec<BTreeSet<String>>, path: String, previous_path: String) {
3126 let left = groups.iter().position(|group| group.contains(&path));
3127 let right = groups
3128 .iter()
3129 .position(|group| group.contains(&previous_path));
3130 match (left, right) {
3131 (Some(left), Some(right)) if left == right => {}
3132 (Some(left), Some(right)) => {
3133 let (keep, remove) = if left < right {
3134 (left, right)
3135 } else {
3136 (right, left)
3137 };
3138 let removed = groups.remove(remove);
3139 groups[keep].extend(removed);
3140 }
3141 (Some(index), None) => {
3142 groups[index].insert(previous_path);
3143 }
3144 (None, Some(index)) => {
3145 groups[index].insert(path);
3146 }
3147 (None, None) => {
3148 groups.push(BTreeSet::from([path, previous_path]));
3149 }
3150 }
3151}
3152
3153fn churn_stats(samples: &[ChurnTouchSample], reference_at: DateTime<Utc>) -> ChurnStats {
3154 let mut last_30d = 0;
3155 let mut last_90d = 0;
3156 let mut recency_weighted = 0.0_f32;
3157 let mut churn_volume = 0_u64;
3158
3159 for sample in samples {
3160 let age_seconds = reference_at
3161 .signed_duration_since(sample.touched_at)
3162 .num_seconds()
3163 .max(0) as f32;
3164 let age_days = age_seconds / 86_400.0;
3165 if age_days <= 30.0 {
3166 last_30d += 1;
3167 }
3168 if age_days <= 90.0 {
3169 last_90d += 1;
3170 }
3171 recency_weighted += 1.0 / (1.0 + age_days / 30.0);
3172 churn_volume += u64::from(sample.additions) + u64::from(sample.deletions);
3173 }
3174
3175 let touch_count = samples.len();
3176 let hotspot_score =
3177 recency_weighted * (touch_count as f32).ln_1p() + (churn_volume as f32).ln_1p() / 10.0;
3178 ChurnStats {
3179 all_time: touch_count,
3180 last_30d,
3181 last_90d,
3182 recency_weighted,
3183 touch_count,
3184 hotspot_score,
3185 }
3186}
3187
3188fn minimum_churn_confidence(samples: &[ChurnTouchSample]) -> Confidence {
3189 samples
3190 .iter()
3191 .map(|sample| sample.confidence)
3192 .min_by_key(|confidence| confidence_rank(*confidence))
3193 .unwrap_or(Confidence::Low)
3194}
3195
3196fn confidence_rank(confidence: Confidence) -> u8 {
3197 match confidence {
3198 Confidence::Low => 0,
3199 Confidence::Medium => 1,
3200 Confidence::High => 2,
3201 Confidence::Exact => 3,
3202 }
3203}
3204
3205fn churn_entity_kind_key(kind: ChurnEntityKind) -> &'static str {
3206 match kind {
3207 ChurnEntityKind::File => "file",
3208 ChurnEntityKind::Module => "module",
3209 ChurnEntityKind::Symbol => "symbol",
3210 }
3211}
3212
3213fn usize_to_i64(value: usize, field: &str) -> Result<i64> {
3214 i64::try_from(value)
3215 .map_err(|_| OkError::Storage(format!("{field} exceeds SQLite integer range")))
3216}
3217
3218fn history_query_limit(limit: usize) -> i64 {
3219 limit.saturating_add(1).min(i64::MAX as usize) as i64
3220}
3221
3222fn collect_limited_json<T, F>(
3223 rows: rusqlite::MappedRows<'_, F>,
3224 limit: usize,
3225) -> Result<(Vec<T>, bool)>
3226where
3227 F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
3228 T: serde::de::DeserializeOwned,
3229{
3230 let mut values = collect_json(rows)?;
3231 let truncated = values.len() > limit;
3232 values.truncate(limit);
3233 Ok((values, truncated))
3234}
3235
3236fn collect_json<T, F>(rows: rusqlite::MappedRows<'_, F>) -> Result<Vec<T>>
3237where
3238 F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
3239 T: serde::de::DeserializeOwned,
3240{
3241 let mut out = Vec::new();
3242 for row in rows {
3243 let raw = row.map_err(storage_err)?;
3244 out.push(serde_json::from_str(&raw)?);
3245 }
3246 Ok(out)
3247}
3248
3249fn graph_node_by_id(conn: &Connection, id: &str) -> Result<Option<GraphNode>> {
3250 let raw: Option<String> = conn
3251 .query_row(
3252 "SELECT json FROM graph_nodes WHERE id = ?1",
3253 params![id],
3254 |row| row.get(0),
3255 )
3256 .optional()
3257 .map_err(storage_err)?;
3258 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
3259 .transpose()
3260}
3261
3262fn storage_err(err: rusqlite::Error) -> OkError {
3263 OkError::Storage(err.to_string())
3264}
3265
3266fn occurrence_id(file_id: &str, value: &str, line: Option<u32>, flag: bool) -> String {
3267 use sha2::{Digest, Sha256};
3268 let mut hasher = Sha256::new();
3269 hasher.update(file_id.as_bytes());
3270 hasher.update(b":");
3271 hasher.update(value.as_bytes());
3272 hasher.update(b":");
3273 hasher.update(line.unwrap_or_default().to_string().as_bytes());
3274 hasher.update(b":");
3275 hasher.update(if flag { b"1" } else { b"0" });
3276 format!("{:x}", hasher.finalize())
3277}
3278
3279fn source_type_name(source_type: &EvidenceSourceType) -> &'static str {
3280 match source_type {
3281 EvidenceSourceType::TreeSitter => "tree_sitter",
3282 EvidenceSourceType::Scip => "scip",
3283 EvidenceSourceType::Lsp => "lsp",
3284 EvidenceSourceType::Regex => "regex",
3285 EvidenceSourceType::Lexical => "lexical",
3286 EvidenceSourceType::Semantic => "semantic",
3287 EvidenceSourceType::Runtime => "runtime",
3288 EvidenceSourceType::GitHistory => "git_history",
3289 EvidenceSourceType::StaticAnalysis => "static_analysis",
3290 EvidenceSourceType::ExternalIntegration => "external_integration",
3291 EvidenceSourceType::Heuristic => "heuristic",
3292 }
3293}
3294
3295#[cfg(test)]
3296mod tests {
3297 use super::{SqliteStore, SQLITE_GRAPH_SCHEMA_VERSION};
3298 use chrono::{TimeZone, Utc};
3299 use open_kioku_core::{
3300 AnalysisFact, ChurnEntityKind, CodeChunk, Confidence, EdgeId, Evidence, EvidenceId,
3301 EvidenceSourceType, File, FileId, GitChangeKind, GitCochangeEdge, GitCommitId,
3302 GitCommitRecord, GitFileTouch, GitSymbolTouch, GraphEdge, GraphEdgeType, GraphNode,
3303 GraphNodeType, HistoryRecordId, HistorySignalQuery, HistorySnapshot, IndexManifest,
3304 IndexQuality, Language, LineRange, NodeId, Owner, Repository, RepositoryId,
3305 ReviewerEvidence, ReviewerRole, SimilarChangeQuery, SimilarityEvidenceSource, Symbol,
3306 SymbolId, SymbolKind, SymbolOccurrence, HISTORY_SCHEMA_VERSION,
3307 };
3308 use open_kioku_storage::{
3309 GraphStore, HistoryStore, IndexData, MetadataStore, PartialIndexUpdate,
3310 };
3311 use rusqlite::{params, Connection};
3312 use std::collections::{BTreeMap, BTreeSet};
3313 use std::time::Duration;
3314
3315 fn make_store() -> SqliteStore {
3316 SqliteStore::open(":memory:").expect("in-memory store")
3317 }
3318
3319 fn make_file(id: &str, path: &str) -> File {
3320 File {
3321 id: FileId::new(id),
3322 repository_id: RepositoryId::new("repo"),
3323 path: path.into(),
3324 language: Language::Rust,
3325 size_bytes: 100,
3326 content_hash: format!("hash-{id}"),
3327 is_generated: false,
3328 is_vendor: false,
3329 }
3330 }
3331
3332 fn make_symbol(id: &str, name: &str, file_id: &str) -> Symbol {
3333 Symbol {
3334 id: SymbolId::new(id),
3335 name: name.into(),
3336 qualified_name: format!("module::{name}"),
3337 kind: SymbolKind::Function,
3338 file_id: FileId::new(file_id),
3339 range: Some(LineRange::single(1)),
3340 language: Language::Rust,
3341 confidence: Confidence::High,
3342 provenance: EvidenceSourceType::TreeSitter,
3343 }
3344 }
3345
3346 fn evidence() -> Evidence {
3347 Evidence {
3348 id: EvidenceId::new("ev-1"),
3349 source: "test".into(),
3350 source_type: EvidenceSourceType::Lexical,
3351 file_range: None,
3352 symbol_id: None,
3353 confidence: Confidence::Medium,
3354 message: "test evidence".into(),
3355 indexed_at: Utc::now(),
3356 ..Default::default()
3357 }
3358 }
3359
3360 fn make_manifest() -> IndexManifest {
3361 IndexManifest {
3362 repository: Repository {
3363 id: RepositoryId::new("repo"),
3364 name: "repo".into(),
3365 root: std::path::PathBuf::from("."),
3366 branch: None,
3367 commit: None,
3368 indexed_at: None,
3369 },
3370 file_count: 2,
3371 symbol_count: 2,
3372 chunk_count: 0,
3373 indexed_at: Utc::now(),
3374 schema_version: 1,
3375 index_mode: Default::default(),
3376 phase_reports: Vec::new(),
3377 quality: IndexQuality::default(),
3378 }
3379 }
3380
3381 fn history_snapshot() -> HistorySnapshot {
3382 let older_at = Utc.with_ymd_and_hms(2026, 5, 1, 12, 0, 0).unwrap();
3383 let newer_at = Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap();
3384 let older_id = GitCommitId::new("older");
3385 let newer_id = GitCommitId::new("newer");
3386 HistorySnapshot {
3387 schema_version: HISTORY_SCHEMA_VERSION,
3388 commits: vec![
3389 GitCommitRecord {
3390 id: older_id.clone(),
3391 parent_ids: Vec::new(),
3392 author: Owner {
3393 name: "Older Author".into(),
3394 email: Some("older@example.com".into()),
3395 },
3396 committer: None,
3397 authored_at: older_at,
3398 committed_at: older_at,
3399 summary: "Introduce library".into(),
3400 message: "Introduce library".into(),
3401 file_count: 2,
3402 },
3403 GitCommitRecord {
3404 id: newer_id.clone(),
3405 parent_ids: vec![older_id.clone()],
3406 author: Owner {
3407 name: "Newer Author".into(),
3408 email: Some("newer@example.com".into()),
3409 },
3410 committer: None,
3411 authored_at: newer_at,
3412 committed_at: newer_at,
3413 summary: "Refine library".into(),
3414 message: "Refine library and tests".into(),
3415 file_count: 3,
3416 },
3417 ],
3418 file_touches: vec![
3419 GitFileTouch {
3420 id: HistoryRecordId::new("file-touch-older"),
3421 commit_id: older_id.clone(),
3422 path: "src/lib.rs".into(),
3423 previous_path: None,
3424 change_kind: GitChangeKind::Added,
3425 additions: Some(20),
3426 deletions: Some(0),
3427 touched_at: older_at,
3428 },
3429 GitFileTouch {
3430 id: HistoryRecordId::new("file-touch-newer"),
3431 commit_id: newer_id.clone(),
3432 path: "src/lib.rs".into(),
3433 previous_path: None,
3434 change_kind: GitChangeKind::Modified,
3435 additions: Some(5),
3436 deletions: Some(2),
3437 touched_at: newer_at,
3438 },
3439 ],
3440 symbol_touches: vec![GitSymbolTouch {
3441 id: HistoryRecordId::new("symbol-touch-newer"),
3442 commit_id: newer_id.clone(),
3443 symbol_id: Some(SymbolId::new("symbol-1")),
3444 qualified_name: "crate::history_for_file".into(),
3445 file_path: "src/lib.rs".into(),
3446 change_kind: GitChangeKind::Modified,
3447 line_ranges: vec![LineRange { start: 4, end: 8 }],
3448 confidence: Confidence::Medium,
3449 uncertainty: vec!["historical coordinates may have shifted".into()],
3450 touched_at: newer_at,
3451 }],
3452 cochange_edges: vec![
3453 GitCochangeEdge {
3454 id: HistoryRecordId::new("cochange-test"),
3455 path: "src/lib.rs".into(),
3456 cochanged_path: "tests/lib_test.rs".into(),
3457 commit_count: 2,
3458 recency_weight: 1.8,
3459 last_changed_at: Some(newer_at),
3460 sample_commits: vec![newer_id.clone(), older_id.clone()],
3461 test_corun: true,
3462 },
3463 GitCochangeEdge {
3464 id: HistoryRecordId::new("cochange-docs"),
3465 path: "src/lib.rs".into(),
3466 cochanged_path: "docs/library.md".into(),
3467 commit_count: 1,
3468 recency_weight: 0.5,
3469 last_changed_at: Some(older_at),
3470 sample_commits: vec![older_id],
3471 test_corun: false,
3472 },
3473 ],
3474 reviewer_evidence: vec![ReviewerEvidence {
3475 id: HistoryRecordId::new("review-newer"),
3476 commit_id: Some(newer_id),
3477 path: None,
3478 reviewer: Owner {
3479 name: "Reviewer".into(),
3480 email: Some("reviewer@example.com".into()),
3481 },
3482 role: ReviewerRole::Reviewer,
3483 observed_at: newer_at,
3484 source: "git-trailer:reviewed-by".into(),
3485 confidence: Confidence::High,
3486 }],
3487 }
3488 }
3489
3490 fn similar_history_snapshot() -> HistorySnapshot {
3491 let intro_at = Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap();
3492 let target_at = Utc.with_ymd_and_hms(2026, 6, 2, 12, 0, 0).unwrap();
3493 let move_at = Utc.with_ymd_and_hms(2026, 6, 3, 12, 0, 0).unwrap();
3494 let docs_at = Utc.with_ymd_and_hms(2026, 6, 4, 12, 0, 0).unwrap();
3495 let intro_id = GitCommitId::new("auth-intro");
3496 let target_id = GitCommitId::new("auth-expiry-fix");
3497 let move_id = GitCommitId::new("auth-module-move");
3498 let docs_id = GitCommitId::new("token-docs");
3499
3500 HistorySnapshot {
3501 schema_version: HISTORY_SCHEMA_VERSION,
3502 commits: vec![
3503 GitCommitRecord {
3504 id: intro_id.clone(),
3505 parent_ids: Vec::new(),
3506 author: Owner {
3507 name: "Auth Dev".into(),
3508 email: Some("auth@example.com".into()),
3509 },
3510 committer: None,
3511 authored_at: intro_at,
3512 committed_at: intro_at,
3513 summary: "Add login token validation".into(),
3514 message: "Add token validation for login requests".into(),
3515 file_count: 1,
3516 },
3517 GitCommitRecord {
3518 id: target_id.clone(),
3519 parent_ids: vec![intro_id.clone()],
3520 author: Owner {
3521 name: "Auth Dev".into(),
3522 email: Some("auth@example.com".into()),
3523 },
3524 committer: None,
3525 authored_at: target_at,
3526 committed_at: target_at,
3527 summary: "Fix token expiration in login flow".into(),
3528 message:
3529 "Fix login token expiration by updating auth validation and auth tests"
3530 .into(),
3531 file_count: 2,
3532 },
3533 GitCommitRecord {
3534 id: move_id.clone(),
3535 parent_ids: vec![target_id.clone()],
3536 author: Owner {
3537 name: "Platform Dev".into(),
3538 email: Some("platform@example.com".into()),
3539 },
3540 committer: None,
3541 authored_at: move_at,
3542 committed_at: move_at,
3543 summary: "Move auth module".into(),
3544 message: "Move auth module without behavior changes".into(),
3545 file_count: 1,
3546 },
3547 GitCommitRecord {
3548 id: docs_id.clone(),
3549 parent_ids: vec![move_id.clone()],
3550 author: Owner {
3551 name: "Docs Dev".into(),
3552 email: Some("docs@example.com".into()),
3553 },
3554 committer: None,
3555 authored_at: docs_at,
3556 committed_at: docs_at,
3557 summary: "Update token glossary".into(),
3558 message: "Refresh token wording in docs".into(),
3559 file_count: 1,
3560 },
3561 ],
3562 file_touches: vec![
3563 GitFileTouch {
3564 id: HistoryRecordId::new("intro-auth"),
3565 commit_id: intro_id.clone(),
3566 path: "src/auth.rs".into(),
3567 previous_path: None,
3568 change_kind: GitChangeKind::Added,
3569 additions: Some(40),
3570 deletions: Some(0),
3571 touched_at: intro_at,
3572 },
3573 GitFileTouch {
3574 id: HistoryRecordId::new("target-auth"),
3575 commit_id: target_id.clone(),
3576 path: "src/auth.rs".into(),
3577 previous_path: None,
3578 change_kind: GitChangeKind::Modified,
3579 additions: Some(12),
3580 deletions: Some(3),
3581 touched_at: target_at,
3582 },
3583 GitFileTouch {
3584 id: HistoryRecordId::new("target-tests"),
3585 commit_id: target_id.clone(),
3586 path: "tests/auth_flow.rs".into(),
3587 previous_path: None,
3588 change_kind: GitChangeKind::Modified,
3589 additions: Some(18),
3590 deletions: Some(1),
3591 touched_at: target_at,
3592 },
3593 GitFileTouch {
3594 id: HistoryRecordId::new("move-auth"),
3595 commit_id: move_id.clone(),
3596 path: "src/auth.rs".into(),
3597 previous_path: None,
3598 change_kind: GitChangeKind::Modified,
3599 additions: Some(3),
3600 deletions: Some(3),
3601 touched_at: move_at,
3602 },
3603 GitFileTouch {
3604 id: HistoryRecordId::new("docs-token"),
3605 commit_id: docs_id.clone(),
3606 path: "docs/tokens.md".into(),
3607 previous_path: None,
3608 change_kind: GitChangeKind::Modified,
3609 additions: Some(5),
3610 deletions: Some(1),
3611 touched_at: docs_at,
3612 },
3613 ],
3614 symbol_touches: vec![GitSymbolTouch {
3615 id: HistoryRecordId::new("target-symbol"),
3616 commit_id: target_id.clone(),
3617 symbol_id: Some(SymbolId::new("auth-validate-token")),
3618 qualified_name: "crate::auth::validate_token".into(),
3619 file_path: "src/auth.rs".into(),
3620 change_kind: GitChangeKind::Modified,
3621 line_ranges: vec![LineRange { start: 10, end: 18 }],
3622 confidence: Confidence::Medium,
3623 uncertainty: Vec::new(),
3624 touched_at: target_at,
3625 }],
3626 cochange_edges: vec![GitCochangeEdge {
3627 id: HistoryRecordId::new("auth-tests-cochange"),
3628 path: "src/auth.rs".into(),
3629 cochanged_path: "tests/auth_flow.rs".into(),
3630 commit_count: 2,
3631 recency_weight: 1.9,
3632 last_changed_at: Some(target_at),
3633 sample_commits: vec![target_id],
3634 test_corun: true,
3635 }],
3636 reviewer_evidence: Vec::new(),
3637 }
3638 }
3639
3640 #[test]
3641 fn history_migration_upgrades_legacy_database_idempotently() {
3642 let dir = tempfile::tempdir().unwrap();
3643 let path = dir.path().join("index.sqlite");
3644 let legacy = Connection::open(&path).unwrap();
3645 legacy
3646 .execute_batch(
3647 r#"
3648 PRAGMA user_version = 0;
3649 CREATE TABLE analysis_facts (
3650 id TEXT PRIMARY KEY,
3651 file_id TEXT NOT NULL,
3652 source_type TEXT NOT NULL,
3653 target TEXT NOT NULL,
3654 json TEXT NOT NULL
3655 );
3656 INSERT INTO analysis_facts(id, file_id, source_type, target, json)
3657 VALUES('legacy-git', 'f1', 'git_history', 'tests/lib_test.rs', '{}');
3658 "#,
3659 )
3660 .unwrap();
3661 drop(legacy);
3662
3663 let store = SqliteStore::open(&path).unwrap();
3664 store.initialize().unwrap();
3665
3666 let conn = store.connection.lock().unwrap();
3667 let version: i64 = conn
3668 .pragma_query_value(None, "user_version", |row| row.get(0))
3669 .unwrap();
3670 assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
3671 let history_table_count: i64 = conn
3672 .query_row(
3673 "SELECT COUNT(*) FROM sqlite_master
3674 WHERE type = 'table'
3675 AND name IN (
3676 'git_commits',
3677 'git_file_touches',
3678 'git_symbol_touches',
3679 'git_cochange_edges',
3680 'git_review_events',
3681 'history_hotspots'
3682 )",
3683 [],
3684 |row| row.get(0),
3685 )
3686 .unwrap();
3687 assert_eq!(history_table_count, 6);
3688 let legacy_fact_count: i64 = conn
3689 .query_row("SELECT COUNT(*) FROM analysis_facts", [], |row| row.get(0))
3690 .unwrap();
3691 assert_eq!(legacy_fact_count, 1);
3692 }
3693
3694 #[test]
3695 fn newer_sqlite_schema_is_rejected_without_mutation() {
3696 let dir = tempfile::tempdir().unwrap();
3697 let path = dir.path().join("future.sqlite");
3698 let future = Connection::open(&path).unwrap();
3699 future
3700 .execute_batch(
3701 r#"
3702 PRAGMA user_version = 3;
3703 CREATE TABLE future_history_marker (id INTEGER PRIMARY KEY);
3704 "#,
3705 )
3706 .unwrap();
3707 drop(future);
3708
3709 let error = match SqliteStore::open(&path) {
3710 Ok(_) => panic!("newer schema should be rejected"),
3711 Err(error) => error.to_string(),
3712 };
3713 assert!(error.contains("newer than supported version 2"));
3714
3715 let conn = Connection::open(&path).unwrap();
3716 let current_table_count: i64 = conn
3717 .query_row(
3718 "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'manifests'",
3719 [],
3720 |row| row.get(0),
3721 )
3722 .unwrap();
3723 assert_eq!(current_table_count, 0);
3724 let future_marker_count: i64 = conn
3725 .query_row(
3726 "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'future_history_marker'",
3727 [],
3728 |row| row.get(0),
3729 )
3730 .unwrap();
3731 assert_eq!(future_marker_count, 1);
3732 }
3733
3734 #[test]
3735 fn history_snapshot_queries_return_typed_evidence() {
3736 let store = make_store();
3737 store.put_history_snapshot(&history_snapshot()).unwrap();
3738
3739 let recent = store.recent_commits(10).unwrap();
3740 assert_eq!(recent.len(), 2);
3741 assert_eq!(recent[0].id.0, "newer");
3742
3743 let neighbors = store
3744 .cochange_neighbors(std::path::Path::new("src/lib.rs"), 10)
3745 .unwrap();
3746 assert_eq!(neighbors.len(), 2);
3747 assert_eq!(
3748 neighbors[0].cochanged_path,
3749 std::path::Path::new("tests/lib_test.rs")
3750 );
3751
3752 let summary = store
3753 .history_for_file(std::path::Path::new("src/lib.rs"), 10)
3754 .unwrap();
3755 assert_eq!(summary.recent_commits.len(), 2);
3756 assert_eq!(summary.file_touches.len(), 2);
3757 assert_eq!(summary.symbol_touches.len(), 1);
3758 assert_eq!(summary.cochange_neighbors.len(), 2);
3759 assert_eq!(summary.reviewer_evidence.len(), 1);
3760 assert!(!summary.truncated);
3761 assert!(summary.uncertainty.is_empty());
3762
3763 let truncated = store
3764 .history_for_file(std::path::Path::new("src/lib.rs"), 1)
3765 .unwrap();
3766 assert!(truncated.truncated);
3767 assert!(truncated
3768 .uncertainty
3769 .iter()
3770 .any(|note| note.contains("truncated")));
3771 }
3772
3773 #[test]
3774 fn similar_changes_rank_and_explain_multi_signal_history() {
3775 let store = make_store();
3776 store
3777 .put_history_snapshot(&similar_history_snapshot())
3778 .unwrap();
3779
3780 let report = store
3781 .similar_changes(
3782 &SimilarChangeQuery {
3783 task: Some("fix token expiration".into()),
3784 paths: vec!["src/auth.rs".into()],
3785 symbols: vec!["validate_token".into()],
3786 },
3787 5,
3788 )
3789 .unwrap();
3790
3791 assert!(!report.truncated);
3792 assert_eq!(report.hits[0].change.commit.id.0, "auth-expiry-fix");
3793 assert!(report.hits[0].score > 0.90, "{:#?}", report.hits[0]);
3794 assert_eq!(report.hits[0].confidence, Confidence::High);
3795 let source_types = report.hits[0]
3796 .evidence
3797 .iter()
3798 .map(|evidence| evidence.source_type)
3799 .collect::<BTreeSet<_>>();
3800 assert!(source_types.contains(&SimilarityEvidenceSource::TaskText));
3801 assert!(source_types.contains(&SimilarityEvidenceSource::CommitMetadata));
3802 assert!(source_types.contains(&SimilarityEvidenceSource::Path));
3803 assert!(source_types.contains(&SimilarityEvidenceSource::Symbol));
3804 assert!(source_types.contains(&SimilarityEvidenceSource::Cochange));
3805 assert!(source_types.contains(&SimilarityEvidenceSource::Churn));
3806
3807 let weak = report
3808 .hits
3809 .iter()
3810 .find(|hit| hit.change.commit.id.0 == "token-docs")
3811 .expect("weak task-text hit should still be visible");
3812 assert_eq!(weak.confidence, Confidence::Low);
3813 assert!(weak
3814 .uncertainty
3815 .iter()
3816 .any(|note| note.contains("low-confidence")));
3817 }
3818
3819 #[test]
3820 fn history_score_components_are_bounded_and_named() {
3821 let store = make_store();
3822 store.put_history_snapshot(&history_snapshot()).unwrap();
3823
3824 let summary = store
3825 .history_score_components(
3826 &HistorySignalQuery {
3827 path: "src/lib.rs".into(),
3828 task: Some("update lib history behavior".into()),
3829 symbols: vec!["crate::history_for_file".into()],
3830 },
3831 10,
3832 )
3833 .unwrap();
3834
3835 let signals = summary
3836 .components
3837 .iter()
3838 .map(|component| component.signal.as_str())
3839 .collect::<BTreeSet<_>>();
3840 assert!(signals.contains("history_churn"), "{summary:#?}");
3841 assert!(signals.contains("similar_change_overlap"), "{summary:#?}");
3842 assert!(signals.contains("reviewer_affinity"), "{summary:#?}");
3843 assert!(summary
3844 .components
3845 .iter()
3846 .all(|component| component.contribution <= 0.18));
3847 assert!(!summary.evidence_refs.is_empty());
3848 assert!(summary.reasons.iter().any(|reason| {
3849 reason.contains("history churn") || reason.contains("similar change")
3850 }));
3851 }
3852
3853 #[test]
3854 fn similar_changes_limit_is_deterministic_and_reports_truncation() {
3855 let store = make_store();
3856 store
3857 .put_history_snapshot(&similar_history_snapshot())
3858 .unwrap();
3859
3860 let report = store
3861 .similar_changes(
3862 &SimilarChangeQuery {
3863 task: Some("fix token expiration".into()),
3864 paths: vec!["src/auth.rs".into()],
3865 symbols: vec!["validate_token".into()],
3866 },
3867 1,
3868 )
3869 .unwrap();
3870
3871 assert!(report.truncated);
3872 assert_eq!(report.hits.len(), 1);
3873 assert_eq!(report.hits[0].change.commit.id.0, "auth-expiry-fix");
3874 assert!(report
3875 .uncertainty
3876 .iter()
3877 .any(|note| note.contains("truncated to 1")));
3878 }
3879
3880 #[test]
3881 fn churn_summaries_are_materialized_with_deterministic_windows() {
3882 let store = make_store();
3883 store.put_history_snapshot(&history_snapshot()).unwrap();
3884
3885 let file = store
3886 .churn_for_file(std::path::Path::new("src/lib.rs"))
3887 .unwrap();
3888 assert_eq!(file.entity_kind, ChurnEntityKind::File);
3889 assert_eq!(file.stats.all_time, 2);
3890 assert_eq!(file.stats.last_30d, 1);
3891 assert_eq!(file.stats.last_90d, 2);
3892 assert_eq!(file.stats.touch_count, 2);
3893 assert!(file.stats.recency_weighted > 1.4);
3894 assert!(file.stats.hotspot_score > file.stats.recency_weighted);
3895 assert_eq!(
3896 file.generated_at,
3897 Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap()
3898 );
3899 assert_eq!(file.confidence, Confidence::Exact);
3900
3901 let module = store.churn_for_module(std::path::Path::new("src")).unwrap();
3902 assert_eq!(module.entity_kind, ChurnEntityKind::Module);
3903 assert_eq!(module.stats.all_time, 2);
3904 assert_eq!(module.stats.last_30d, 1);
3905 assert_eq!(module.confidence, Confidence::Medium);
3906 assert!(module
3907 .uncertainty
3908 .iter()
3909 .any(|note| note.contains("aggregated from persisted file touches")));
3910
3911 let symbol_id = SymbolId::new("symbol-1");
3912 let symbol = store.churn_for_symbol(&symbol_id).unwrap();
3913 assert_eq!(symbol.entity_kind, ChurnEntityKind::Symbol);
3914 assert_eq!(symbol.stats.all_time, 1);
3915 assert_eq!(symbol.stats.last_30d, 1);
3916 assert_eq!(symbol.stats.last_90d, 1);
3917 assert_eq!(symbol.confidence, Confidence::Medium);
3918 assert_eq!(
3919 symbol.qualified_name.as_deref(),
3920 Some("crate::history_for_file")
3921 );
3922 assert!(symbol
3923 .uncertainty
3924 .iter()
3925 .any(|note| note.contains("historical coordinates may have shifted")));
3926
3927 let missing = store
3928 .churn_for_symbol(&SymbolId::new("missing-symbol"))
3929 .unwrap();
3930 assert_eq!(missing.stats.touch_count, 0);
3931 assert_eq!(missing.confidence, Confidence::Low);
3932 assert!(missing
3933 .uncertainty
3934 .iter()
3935 .any(|note| note.contains("no persisted symbol-level churn")));
3936 }
3937
3938 #[test]
3939 fn hotspot_ordering_and_lookup_use_persisted_summary_table() {
3940 let store = make_store();
3941 let mut snapshot = history_snapshot();
3942 snapshot.file_touches.push(GitFileTouch {
3943 id: HistoryRecordId::new("file-touch-docs"),
3944 commit_id: GitCommitId::new("older"),
3945 path: "docs/readme.md".into(),
3946 previous_path: None,
3947 change_kind: GitChangeKind::Modified,
3948 additions: Some(1),
3949 deletions: Some(0),
3950 touched_at: Utc.with_ymd_and_hms(2026, 5, 1, 12, 0, 0).unwrap(),
3951 });
3952 store.put_history_snapshot(&snapshot).unwrap();
3953
3954 let conn = store.connection.lock().unwrap();
3955 let mut stmt = conn
3956 .prepare(
3957 "SELECT entity_key FROM history_hotspots
3958 WHERE entity_kind = 'file'
3959 ORDER BY hotspot_score DESC, touch_count DESC, entity_key
3960 LIMIT 2",
3961 )
3962 .unwrap();
3963 let rows = stmt
3964 .query_map([], |row| row.get::<_, String>(0))
3965 .unwrap()
3966 .collect::<std::result::Result<Vec<_>, _>>()
3967 .unwrap();
3968 drop(stmt);
3969 drop(conn);
3970 assert_eq!(rows, vec!["src/lib.rs", "docs/readme.md"]);
3971
3972 let mut elapsed = Vec::new();
3973 for _ in 0..40 {
3974 let started = std::time::Instant::now();
3975 let summary = store
3976 .churn_for_file(std::path::Path::new("src/lib.rs"))
3977 .unwrap();
3978 assert_eq!(summary.stats.touch_count, 2);
3979 elapsed.push(started.elapsed());
3980 }
3981 elapsed.sort();
3982 let p95 = elapsed[(elapsed.len() * 95 / 100).min(elapsed.len() - 1)];
3983 assert!(
3984 p95 < Duration::from_millis(200),
3985 "persisted churn lookup p95 was {p95:?}"
3986 );
3987 }
3988
3989 #[test]
3990 fn churn_summaries_follow_rename_aliases_without_module_double_counting() {
3991 let store = make_store();
3992 let mut snapshot = history_snapshot();
3993 snapshot.file_touches[0].path = "src/old.rs".into();
3994 snapshot.file_touches[1].previous_path = Some("src/old.rs".into());
3995 snapshot.file_touches[1].change_kind = GitChangeKind::Renamed;
3996 store.put_history_snapshot(&snapshot).unwrap();
3997
3998 let current = store
3999 .churn_for_file(std::path::Path::new("src/lib.rs"))
4000 .unwrap();
4001 let historical = store
4002 .churn_for_file(std::path::Path::new("src/old.rs"))
4003 .unwrap();
4004 assert_eq!(current.stats.all_time, 2);
4005 assert_eq!(historical.stats.all_time, 2);
4006 assert_eq!(current.stats.last_30d, 1);
4007 assert_eq!(historical.stats.last_30d, 1);
4008
4009 let module = store.churn_for_module(std::path::Path::new("src")).unwrap();
4010 assert_eq!(module.stats.all_time, 2);
4011 assert_eq!(module.stats.last_90d, 2);
4012
4013 let root = store.churn_for_module(std::path::Path::new(".")).unwrap();
4014 assert_eq!(root.key, "__root__");
4015 assert_eq!(root.stats.all_time, 2);
4016 }
4017
4018 #[test]
4019 fn provenance_queries_return_first_last_and_explicit_symbol_uncertainty() {
4020 let store = make_store();
4021 let file = make_file("file-1", "src/lib.rs");
4022 let symbol = make_symbol("symbol-1", "history_for_file", "file-1");
4023 let mut unmapped_symbol = make_symbol("symbol-2", "unmapped", "file-1");
4024 unmapped_symbol.range = None;
4025 let manifest = make_manifest();
4026 store
4027 .replace_index(IndexData {
4028 manifest: &manifest,
4029 files: std::slice::from_ref(&file),
4030 symbols: &[symbol.clone(), unmapped_symbol.clone()],
4031 chunks: &[],
4032 tests: &[],
4033 imports: &[],
4034 occurrences: &[],
4035 analysis_facts: &[],
4036 })
4037 .unwrap();
4038 store.put_history_snapshot(&history_snapshot()).unwrap();
4039
4040 let file_provenance = store
4041 .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
4042 .unwrap();
4043 assert_eq!(
4044 file_provenance
4045 .first_seen
4046 .as_ref()
4047 .map(|touch| touch.commit.id.0.as_str()),
4048 Some("older")
4049 );
4050 assert_eq!(
4051 file_provenance
4052 .last_touched
4053 .as_ref()
4054 .map(|touch| touch.commit.id.0.as_str()),
4055 Some("newer")
4056 );
4057 assert_eq!(file_provenance.recent_touches.len(), 2);
4058 assert_eq!(file_provenance.confidence, Confidence::Exact);
4059
4060 let symbol_provenance = store.provenance_for_symbol(&symbol.id, 10).unwrap();
4061 assert_eq!(symbol_provenance.recent_touches.len(), 1);
4062 assert_eq!(symbol_provenance.confidence, Confidence::Medium);
4063 assert_eq!(
4064 symbol_provenance.recent_touches[0].commit.author.name,
4065 "Newer Author"
4066 );
4067 assert_eq!(
4068 symbol_provenance.recent_touches[0].line_ranges,
4069 vec![LineRange { start: 4, end: 8 }]
4070 );
4071 assert!(symbol_provenance
4072 .uncertainty
4073 .iter()
4074 .any(|note| note.contains("earliest line-mapped touch")));
4075
4076 let unmapped = store
4077 .provenance_for_symbol(&unmapped_symbol.id, 10)
4078 .unwrap();
4079 assert!(unmapped.first_seen.is_none());
4080 assert!(unmapped.last_touched.is_none());
4081 assert!(unmapped.recent_touches.is_empty());
4082 assert_eq!(unmapped.confidence, Confidence::Low);
4083 assert!(unmapped
4084 .uncertainty
4085 .iter()
4086 .any(|note| note.contains("no persisted line-level commit mapping")));
4087 assert!(unmapped
4088 .uncertainty
4089 .iter()
4090 .any(|note| note.contains("has no line range")));
4091 }
4092
4093 #[test]
4094 fn path_provenance_follows_rename_aliases_in_both_directions() {
4095 let store = make_store();
4096 let mut snapshot = history_snapshot();
4097 snapshot.file_touches[0].path = "src/old.rs".into();
4098 snapshot.file_touches[1].previous_path = Some("src/old.rs".into());
4099 snapshot.file_touches[1].change_kind = GitChangeKind::Renamed;
4100 store.put_history_snapshot(&snapshot).unwrap();
4101
4102 let current = store
4103 .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
4104 .unwrap();
4105 let historical = store
4106 .provenance_for_path(std::path::Path::new("src/old.rs"), 10)
4107 .unwrap();
4108
4109 assert_eq!(current.recent_touches.len(), 2);
4110 assert_eq!(historical.recent_touches.len(), 2);
4111 assert_eq!(
4112 current
4113 .first_seen
4114 .as_ref()
4115 .map(|touch| touch.path.as_path()),
4116 Some(std::path::Path::new("src/old.rs"))
4117 );
4118 }
4119
4120 #[test]
4121 fn invalid_snapshot_does_not_replace_existing_history() {
4122 let store = make_store();
4123 let snapshot = history_snapshot();
4124 store.put_history_snapshot(&snapshot).unwrap();
4125
4126 let mut invalid = snapshot;
4127 invalid.file_touches[0].commit_id = GitCommitId::new("missing");
4128 let error = store
4129 .put_history_snapshot(&invalid)
4130 .unwrap_err()
4131 .to_string();
4132 assert!(error.contains("references missing commit `missing`"));
4133
4134 let recent = store.recent_commits(10).unwrap();
4135 assert_eq!(recent.len(), 2);
4136 assert_eq!(recent[0].id.0, "newer");
4137
4138 store
4139 .put_history_snapshot(&HistorySnapshot::empty())
4140 .unwrap();
4141 assert!(store.recent_commits(10).unwrap().is_empty());
4142 }
4143
4144 #[test]
4145 fn replace_index_and_list_files() {
4146 let store = make_store();
4147 let file1 = make_file("f1", "src/main.rs");
4148 let file2 = make_file("f2", "src/lib.rs");
4149 let sym1 = make_symbol("s1", "main_fn", "f1");
4150
4151 let manifest = make_manifest();
4152 let files = vec![file1.clone(), file2.clone()];
4153 let symbols = vec![sym1.clone()];
4154
4155 let data = IndexData {
4156 manifest: &manifest,
4157 files: &files,
4158 symbols: &symbols,
4159 occurrences: &[],
4160 chunks: &[],
4161 imports: &[],
4162 tests: &[],
4163 analysis_facts: &[],
4164 };
4165 store.replace_index(data).unwrap();
4166
4167 let files_list = store.list_files(100, 0).unwrap();
4168 assert_eq!(files_list.len(), 2);
4169
4170 let by_path = store
4171 .get_file_by_path(&std::path::PathBuf::from("src/main.rs"))
4172 .unwrap();
4173 assert!(by_path.is_some());
4174 assert_eq!(by_path.unwrap().id, file1.id);
4175 }
4176
4177 #[test]
4178 fn partial_replace_updates_changed_files_and_cleans_deleted_graph_edges() {
4179 let store = make_store();
4180 let manifest = make_manifest();
4181 let file1 = make_file("f1", "src/main.rs");
4182 let file2 = make_file("f2", "src/lib.rs");
4183 let sym1 = make_symbol("s1", "main_fn", "f1");
4184 let sym2 = make_symbol("s2", "lib_fn", "f2");
4185 let old_chunk = CodeChunk {
4186 id: "c1".into(),
4187 file_id: file1.id.clone(),
4188 range: LineRange { start: 1, end: 1 },
4189 language: Language::Rust,
4190 text: "fn main_fn() {}".into(),
4191 symbol_id: Some(sym1.id.clone()),
4192 };
4193 store
4194 .replace_index(IndexData {
4195 manifest: &manifest,
4196 files: &[file1.clone(), file2.clone()],
4197 symbols: &[sym1.clone(), sym2.clone()],
4198 chunks: std::slice::from_ref(&old_chunk),
4199 tests: &[],
4200 imports: &[],
4201 occurrences: &[SymbolOccurrence {
4202 symbol_id: sym1.id.clone(),
4203 file_id: file1.id.clone(),
4204 range: Some(LineRange::single(1)),
4205 is_definition: true,
4206 confidence: Confidence::Exact,
4207 provenance: EvidenceSourceType::StaticAnalysis,
4208 }],
4209 analysis_facts: &[],
4210 })
4211 .unwrap();
4212 let node1 = GraphNode {
4213 id: NodeId::new("symbol:s1"),
4214 node_type: GraphNodeType::Function,
4215 label: "main_fn".into(),
4216 file_id: Some(file1.id.clone()),
4217 symbol_id: Some(sym1.id.clone()),
4218 ..Default::default()
4219 };
4220 let node2 = GraphNode {
4221 id: NodeId::new("symbol:s2"),
4222 node_type: GraphNodeType::Function,
4223 label: "lib_fn".into(),
4224 file_id: Some(file2.id.clone()),
4225 symbol_id: Some(sym2.id.clone()),
4226 ..Default::default()
4227 };
4228 let edge = GraphEdge {
4229 id: EdgeId::new("edge:s1-s2"),
4230 from: node1.id.clone(),
4231 to: node2.id.clone(),
4232 edge_type: GraphEdgeType::References,
4233 evidence: evidence(),
4234 ..Default::default()
4235 };
4236 let node3 = GraphNode {
4237 id: NodeId::new("external:a"),
4238 node_type: GraphNodeType::Module,
4239 label: "external a".into(),
4240 ..Default::default()
4241 };
4242 let node4 = GraphNode {
4243 id: NodeId::new("external:b"),
4244 node_type: GraphNodeType::Module,
4245 label: "external b".into(),
4246 ..Default::default()
4247 };
4248 let mut source_evidence = evidence();
4249 source_evidence.source = "src/main.rs".into();
4250 let source_edge = GraphEdge {
4251 id: EdgeId::new("edge:source-file"),
4252 from: node3.id.clone(),
4253 to: node4.id.clone(),
4254 edge_type: GraphEdgeType::RelatedToTicket,
4255 evidence: source_evidence,
4256 ..Default::default()
4257 };
4258 store
4259 .replace_graph(
4260 &[node1, node2.clone(), node3.clone(), node4.clone()],
4261 &[edge.clone(), source_edge],
4262 )
4263 .unwrap();
4264
4265 let mut updated_file2 = file2.clone();
4266 updated_file2.content_hash = "new-hash".into();
4267 let updated_sym2 = make_symbol("s2b", "lib_fn_new", "f2");
4268 let updated_chunk = CodeChunk {
4269 id: "c2".into(),
4270 file_id: updated_file2.id.clone(),
4271 range: LineRange { start: 2, end: 2 },
4272 language: Language::Rust,
4273 text: "fn lib_fn_new() {}".into(),
4274 symbol_id: Some(updated_sym2.id.clone()),
4275 };
4276 let updated_node2 = GraphNode {
4277 id: NodeId::new("symbol:s2b"),
4278 node_type: GraphNodeType::Function,
4279 label: "lib_fn_new".into(),
4280 file_id: Some(updated_file2.id.clone()),
4281 symbol_id: Some(updated_sym2.id.clone()),
4282 ..Default::default()
4283 };
4284 store
4285 .replace_files_index(PartialIndexUpdate {
4286 manifest: &manifest,
4287 changed_files: std::slice::from_ref(&updated_file2),
4288 deleted_file_ids: std::slice::from_ref(&file1.id),
4289 symbols: std::slice::from_ref(&updated_sym2),
4290 chunks: std::slice::from_ref(&updated_chunk),
4291 tests: &[],
4292 imports: &[],
4293 occurrences: &[],
4294 analysis_facts: &[],
4295 graph_nodes: std::slice::from_ref(&updated_node2),
4296 graph_edges: &[],
4297 })
4298 .unwrap();
4299
4300 assert!(store
4301 .get_file_by_path(std::path::Path::new("src/main.rs"))
4302 .unwrap()
4303 .is_none());
4304 assert_eq!(
4305 store
4306 .get_file_by_path(std::path::Path::new("src/lib.rs"))
4307 .unwrap()
4308 .unwrap()
4309 .content_hash,
4310 "new-hash"
4311 );
4312 assert!(store.symbol_by_id(&sym1.id).unwrap().is_none());
4313 assert!(store.symbol_by_id(&updated_sym2.id).unwrap().is_some());
4314 assert!(store.chunks_for_file(&file1.id).unwrap().is_empty());
4315 assert_eq!(store.chunks_for_file(&file2.id).unwrap()[0].id, "c2");
4316 let edge_count: i64 = store
4317 .connection
4318 .lock()
4319 .unwrap()
4320 .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
4321 .unwrap();
4322 assert_eq!(edge_count, 0);
4323 assert!(store.node_by_id("symbol:s1").unwrap().is_none());
4324 assert!(store.node_by_id("symbol:s2b").unwrap().is_some());
4325 }
4326
4327 #[test]
4328 fn partial_replace_rolls_back_on_insert_failure() {
4329 let store = make_store();
4330 let manifest = make_manifest();
4331 let file = make_file("f1", "src/lib.rs");
4332 store
4333 .replace_index(IndexData {
4334 manifest: &manifest,
4335 files: std::slice::from_ref(&file),
4336 symbols: &[],
4337 chunks: &[],
4338 tests: &[],
4339 imports: &[],
4340 occurrences: &[],
4341 analysis_facts: &[],
4342 })
4343 .unwrap();
4344
4345 let duplicate_a = make_file("f2", "src/dup.rs");
4346 let mut duplicate_b = make_file("f3", "src/dup.rs");
4347 duplicate_b.content_hash = "other".into();
4348 let error = store
4349 .replace_files_index(PartialIndexUpdate {
4350 manifest: &manifest,
4351 changed_files: &[duplicate_a, duplicate_b],
4352 deleted_file_ids: std::slice::from_ref(&file.id),
4353 symbols: &[],
4354 chunks: &[],
4355 tests: &[],
4356 imports: &[],
4357 occurrences: &[],
4358 analysis_facts: &[],
4359 graph_nodes: &[],
4360 graph_edges: &[],
4361 })
4362 .unwrap_err()
4363 .to_string();
4364 assert!(error.contains("UNIQUE") || error.contains("constraint"));
4365 assert!(store
4366 .get_file_by_path(std::path::Path::new("src/lib.rs"))
4367 .unwrap()
4368 .is_some());
4369 assert!(store
4370 .get_file_by_path(std::path::Path::new("src/dup.rs"))
4371 .unwrap()
4372 .is_none());
4373 }
4374
4375 #[test]
4376 fn replace_index_persists_analysis_facts() {
4377 let store = make_store();
4378 let file = make_file("f1", "src/handler.rs");
4379 let manifest = make_manifest();
4380 let runtime_fact = AnalysisFact {
4381 id: "runtime-1".into(),
4382 file_id: file.id.clone(),
4383 symbol_id: None,
4384 target: "GET /api/orders".into(),
4385 target_kind: GraphNodeType::Endpoint,
4386 edge_type: GraphEdgeType::ExposesEndpoint,
4387 range: Some(LineRange::single(12)),
4388 confidence: Confidence::High,
4389 source: "open-kioku-runtime:.ok/runtime/spans.jsonl".into(),
4390 source_type: EvidenceSourceType::Runtime,
4391 message: "runtime endpoint observed in local trace artifact".into(),
4392 };
4393 let static_fact = AnalysisFact {
4394 id: "static-1".into(),
4395 file_id: file.id.clone(),
4396 symbol_id: None,
4397 target: "orders".into(),
4398 target_kind: GraphNodeType::DatabaseTable,
4399 edge_type: GraphEdgeType::ReadsTable,
4400 range: None,
4401 confidence: Confidence::Medium,
4402 source: "open-kioku-static".into(),
4403 source_type: EvidenceSourceType::StaticAnalysis,
4404 message: "static fact".into(),
4405 };
4406 let git_fact = AnalysisFact {
4407 id: "git-1".into(),
4408 file_id: file.id.clone(),
4409 symbol_id: None,
4410 target: "tests/handler_test.rs".into(),
4411 target_kind: GraphNodeType::Test,
4412 edge_type: GraphEdgeType::ChangedBy,
4413 range: None,
4414 confidence: Confidence::High,
4415 source: "git-history:abc123".into(),
4416 source_type: EvidenceSourceType::GitHistory,
4417 message: "git co-change observed in 1 commit(s), recency weight 1.00".into(),
4418 };
4419
4420 store
4421 .replace_index(IndexData {
4422 manifest: &manifest,
4423 files: &[file],
4424 symbols: &[],
4425 occurrences: &[],
4426 chunks: &[],
4427 imports: &[],
4428 tests: &[],
4429 analysis_facts: &[runtime_fact.clone(), static_fact, git_fact.clone()],
4430 })
4431 .unwrap();
4432
4433 let runtime = store
4434 .analysis_facts(Some(EvidenceSourceType::Runtime), 10)
4435 .unwrap();
4436 assert_eq!(runtime.len(), 1);
4437 assert_eq!(runtime[0].id, runtime_fact.id);
4438 assert_eq!(runtime[0].target, runtime_fact.target);
4439 let git = store
4440 .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
4441 .unwrap();
4442 assert_eq!(git.len(), 1);
4443 assert_eq!(git[0].id, git_fact.id);
4444 assert_eq!(git[0].target, git_fact.target);
4445 let all = store.analysis_facts(None, 10).unwrap();
4446 assert_eq!(all.len(), 3);
4447 }
4448
4449 #[test]
4450 fn replace_index_preserves_typed_and_legacy_history() {
4451 let store = make_store();
4452 store.put_history_snapshot(&history_snapshot()).unwrap();
4453
4454 let file = make_file("f1", "src/lib.rs");
4455 let manifest = make_manifest();
4456 let git_fact = AnalysisFact {
4457 id: "legacy-git-1".into(),
4458 file_id: file.id.clone(),
4459 symbol_id: None,
4460 target: "tests/lib_test.rs".into(),
4461 target_kind: GraphNodeType::Test,
4462 edge_type: GraphEdgeType::ChangedBy,
4463 range: None,
4464 confidence: Confidence::High,
4465 source: "git-history:newer".into(),
4466 source_type: EvidenceSourceType::GitHistory,
4467 message: "legacy co-change compatibility fact".into(),
4468 };
4469
4470 for _ in 0..2 {
4471 store
4472 .replace_index(IndexData {
4473 manifest: &manifest,
4474 files: std::slice::from_ref(&file),
4475 symbols: &[],
4476 occurrences: &[],
4477 chunks: &[],
4478 imports: &[],
4479 tests: &[],
4480 analysis_facts: std::slice::from_ref(&git_fact),
4481 })
4482 .unwrap();
4483 }
4484
4485 assert_eq!(store.recent_commits(10).unwrap().len(), 2);
4486 let summary = store
4487 .history_for_file(std::path::Path::new("src/lib.rs"), 10)
4488 .unwrap();
4489 assert_eq!(summary.file_touches.len(), 2);
4490 let legacy = store
4491 .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
4492 .unwrap();
4493 assert_eq!(legacy.len(), 1);
4494 assert_eq!(legacy[0].id, git_fact.id);
4495 }
4496
4497 #[test]
4498 fn list_symbols_with_filter() {
4499 let store = make_store();
4500 let file = make_file("f1", "src/lib.rs");
4501 let sym_a = make_symbol("s1", "alpha_handler", "f1");
4502 let sym_b = make_symbol("s2", "beta_worker", "f1");
4503 let manifest = make_manifest();
4504 let files = vec![file];
4505 let symbols = vec![sym_a, sym_b];
4506 let data = IndexData {
4507 manifest: &manifest,
4508 files: &files,
4509 symbols: &symbols,
4510 occurrences: &[],
4511 chunks: &[],
4512 imports: &[],
4513 tests: &[],
4514 analysis_facts: &[],
4515 };
4516 store.replace_index(data).unwrap();
4517
4518 let all = store.list_symbols(None, 100, 0).unwrap();
4519 assert_eq!(all.len(), 2);
4520
4521 let filtered = store.list_symbols(Some("alpha"), 10, 0).unwrap();
4522 assert_eq!(filtered.len(), 1);
4523 assert_eq!(filtered[0].name, "alpha_handler");
4524 }
4525
4526 #[test]
4527 fn replace_graph_and_neighbors() {
4528 let store = make_store();
4529 let file = make_file("f1", "src/lib.rs");
4531 let manifest = make_manifest();
4532 let files = vec![file];
4533 let data = IndexData {
4534 manifest: &manifest,
4535 files: &files,
4536 symbols: &[],
4537 occurrences: &[],
4538 chunks: &[],
4539 imports: &[],
4540 tests: &[],
4541 analysis_facts: &[],
4542 };
4543 store.replace_index(data).unwrap();
4544
4545 let node_a = GraphNode {
4546 id: NodeId::new("file:src/lib.rs"),
4547 node_type: GraphNodeType::File,
4548 label: "src/lib.rs".into(),
4549 file_id: Some(FileId::new("f1")),
4550 symbol_id: None,
4551 ..Default::default()
4552 };
4553 let node_b = GraphNode {
4554 id: NodeId::new("symbol:s1"),
4555 node_type: GraphNodeType::Function,
4556 label: "worker".into(),
4557 file_id: Some(FileId::new("f1")),
4558 symbol_id: Some(SymbolId::new("s1")),
4559 ..Default::default()
4560 };
4561 let edge = GraphEdge {
4562 id: EdgeId::new("e1"),
4563 from: node_a.id.clone(),
4564 to: node_b.id.clone(),
4565 edge_type: GraphEdgeType::Defines,
4566 evidence: evidence(),
4567 ..Default::default()
4568 };
4569
4570 store
4571 .replace_graph(
4572 &[node_a.clone(), node_b.clone()],
4573 std::slice::from_ref(&edge),
4574 )
4575 .unwrap();
4576
4577 let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
4578 assert_eq!(edges.len(), 1);
4579 assert_eq!(edges[0].id.0, "e1");
4580 assert!(nodes.iter().any(|n| n.id == node_a.id));
4581 }
4582
4583 #[test]
4584 fn graph_facts_with_properties_and_confidence_metadata_round_trip() {
4585 let store = make_store();
4586 let file = make_file("f1", "src/lib.rs");
4587 let manifest = make_manifest();
4588 let files = vec![file];
4589 let data = IndexData {
4590 manifest: &manifest,
4591 files: &files,
4592 symbols: &[],
4593 occurrences: &[],
4594 chunks: &[],
4595 imports: &[],
4596 tests: &[],
4597 analysis_facts: &[],
4598 };
4599 store.replace_index(data).unwrap();
4600
4601 let node_a = GraphNode {
4602 id: NodeId::new("file:src/lib.rs"),
4603 node_type: GraphNodeType::File,
4604 label: "src/lib.rs".into(),
4605 file_id: Some(FileId::new("f1")),
4606 properties: BTreeMap::from([("package".into(), serde_json::json!("open-kioku"))]),
4607 schema_version: Some("graph-v1".into()),
4608 source_pass: Some("tree_sitter".into()),
4609 index_mode: Some("full".into()),
4610 extractor_version: Some("test-extractor".into()),
4611 ambiguity: vec!["generated file status unknown".into()],
4612 quality_notes: vec!["file path verified".into()],
4613 ..Default::default()
4614 };
4615 let node_b = GraphNode {
4616 id: NodeId::new("symbol:s1"),
4617 node_type: GraphNodeType::Function,
4618 label: "worker".into(),
4619 file_id: Some(FileId::new("f1")),
4620 symbol_id: Some(SymbolId::new("s1")),
4621 ..Default::default()
4622 };
4623 let mut edge_evidence = evidence();
4624 edge_evidence.confidence_score = Some(0.98);
4625 edge_evidence.confidence_reason = Some("exact symbol occurrence".into());
4626 edge_evidence.freshness = Some("fresh".into());
4627 let edge = GraphEdge {
4628 id: EdgeId::new("e1"),
4629 from: node_a.id.clone(),
4630 to: node_b.id.clone(),
4631 edge_type: GraphEdgeType::Defines,
4632 evidence: edge_evidence,
4633 properties: BTreeMap::from([("relation".into(), serde_json::json!("definition"))]),
4634 schema_version: Some("graph-v1".into()),
4635 source_pass: Some("scip".into()),
4636 index_mode: Some("full".into()),
4637 extractor_version: Some("test-scip".into()),
4638 ambiguity: vec!["macro expansion not modeled".into()],
4639 quality_notes: vec!["exact definition edge".into()],
4640 };
4641
4642 store
4643 .replace_graph(
4644 &[node_a.clone(), node_b.clone()],
4645 std::slice::from_ref(&edge),
4646 )
4647 .unwrap();
4648
4649 let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
4650 let stored_node = nodes.iter().find(|node| node.id == node_a.id).unwrap();
4651 assert_eq!(stored_node.properties, node_a.properties);
4652 assert_eq!(stored_node.schema_version.as_deref(), Some("graph-v1"));
4653 assert_eq!(stored_node.source_pass.as_deref(), Some("tree_sitter"));
4654 assert_eq!(stored_node.quality_notes, vec!["file path verified"]);
4655
4656 assert_eq!(edges.len(), 1);
4657 let stored_edge = &edges[0];
4658 assert_eq!(stored_edge.properties, edge.properties);
4659 assert_eq!(stored_edge.schema_version.as_deref(), Some("graph-v1"));
4660 assert_eq!(stored_edge.evidence.confidence_score, Some(0.98));
4661 assert_eq!(
4662 stored_edge.evidence.confidence_reason.as_deref(),
4663 Some("exact symbol occurrence")
4664 );
4665 assert_eq!(stored_edge.evidence.freshness.as_deref(), Some("fresh"));
4666
4667 let indexed_confidence: String = store
4668 .connection
4669 .lock()
4670 .unwrap()
4671 .query_row(
4672 "SELECT confidence FROM graph_edges WHERE id = 'e1'",
4673 [],
4674 |row| row.get(0),
4675 )
4676 .unwrap();
4677 assert_eq!(indexed_confidence, "Medium");
4678 }
4679
4680 #[test]
4681 fn shortest_path_finds_direct_route() {
4682 let store = make_store();
4683 let file = make_file("f1", "src/lib.rs");
4684 let manifest = make_manifest();
4685 let files = vec![file];
4686 let data = IndexData {
4687 manifest: &manifest,
4688 files: &files,
4689 symbols: &[],
4690 occurrences: &[],
4691 chunks: &[],
4692 imports: &[],
4693 tests: &[],
4694 analysis_facts: &[],
4695 };
4696 store.replace_index(data).unwrap();
4697
4698 let node_a = GraphNode {
4699 id: NodeId::new("a"),
4700 node_type: GraphNodeType::File,
4701 label: "a".into(),
4702 file_id: None,
4703 symbol_id: None,
4704 ..Default::default()
4705 };
4706 let node_b = GraphNode {
4707 id: NodeId::new("b"),
4708 node_type: GraphNodeType::File,
4709 label: "b".into(),
4710 file_id: None,
4711 symbol_id: None,
4712 ..Default::default()
4713 };
4714 let edge = GraphEdge {
4715 id: EdgeId::new("a-b"),
4716 from: node_a.id.clone(),
4717 to: node_b.id.clone(),
4718 edge_type: GraphEdgeType::Defines,
4719 evidence: evidence(),
4720 ..Default::default()
4721 };
4722 store.replace_graph(&[node_a, node_b], &[edge]).unwrap();
4723
4724 let path = store.shortest_path("a", "b", 5).unwrap();
4725 assert_eq!(path.len(), 1);
4726 assert_eq!(path[0].id.0, "a-b");
4727 }
4728
4729 #[test]
4730 fn shortest_path_returns_empty_when_no_route() {
4731 let store = make_store();
4732 let file = make_file("f1", "src/lib.rs");
4733 let manifest = make_manifest();
4734 let files = vec![file];
4735 let data = IndexData {
4736 manifest: &manifest,
4737 files: &files,
4738 symbols: &[],
4739 occurrences: &[],
4740 chunks: &[],
4741 imports: &[],
4742 tests: &[],
4743 analysis_facts: &[],
4744 };
4745 store.replace_index(data).unwrap();
4746 store.replace_graph(&[], &[]).unwrap();
4747
4748 let path = store.shortest_path("x", "y", 5).unwrap();
4749 assert!(path.is_empty());
4750 }
4751
4752 #[test]
4753 fn test_old_graph_tables_migrate_and_replace_graph_backfills_columns() {
4754 let store = make_store();
4755 let legacy_file = GraphNode {
4756 id: NodeId::new("legacy_file"),
4757 node_type: GraphNodeType::File,
4758 label: "legacy.rs".into(),
4759 file_id: Some(FileId::new("f1")),
4760 ..Default::default()
4761 };
4762 let legacy_symbol = GraphNode {
4763 id: NodeId::new("legacy_symbol"),
4764 node_type: GraphNodeType::Function,
4765 label: "legacy_fn".into(),
4766 symbol_id: Some(SymbolId::new("s1")),
4767 ..Default::default()
4768 };
4769 let mut legacy_evidence = evidence();
4770 legacy_evidence.source_type = EvidenceSourceType::Scip;
4771 legacy_evidence.source = "index.scip".into();
4772 let legacy_edge = GraphEdge {
4773 id: EdgeId::new("legacy_edge"),
4774 from: legacy_file.id.clone(),
4775 to: legacy_symbol.id.clone(),
4776 edge_type: GraphEdgeType::Defines,
4777 evidence: legacy_evidence,
4778 ..Default::default()
4779 };
4780 {
4781 let conn = store.connection.lock().unwrap();
4782 conn.execute("DROP TABLE graph_nodes", []).unwrap();
4783 conn.execute("DROP TABLE graph_edges", []).unwrap();
4784 conn.execute(
4785 "CREATE TABLE graph_nodes(id TEXT PRIMARY KEY, label TEXT, json TEXT)",
4786 [],
4787 )
4788 .unwrap();
4789 conn.execute("CREATE TABLE graph_edges(id TEXT PRIMARY KEY, from_id TEXT, to_id TEXT, edge_type TEXT, json TEXT)", []).unwrap();
4790 conn.execute(
4791 "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
4792 params![
4793 legacy_file.id.0.as_str(),
4794 legacy_file.label.as_str(),
4795 serde_json::to_string(&legacy_file).unwrap(),
4796 ],
4797 )
4798 .unwrap();
4799 conn.execute(
4800 "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
4801 params![
4802 legacy_symbol.id.0.as_str(),
4803 legacy_symbol.label.as_str(),
4804 serde_json::to_string(&legacy_symbol).unwrap(),
4805 ],
4806 )
4807 .unwrap();
4808 conn.execute(
4809 "INSERT INTO graph_edges(id, from_id, to_id, edge_type, json)
4810 VALUES(?1, ?2, ?3, '', ?4)",
4811 params![
4812 legacy_edge.id.0.as_str(),
4813 legacy_edge.from.0.as_str(),
4814 legacy_edge.to.0.as_str(),
4815 serde_json::to_string(&legacy_edge).unwrap(),
4816 ],
4817 )
4818 .unwrap();
4819 }
4820 store.initialize().unwrap();
4821 store.initialize().unwrap();
4822
4823 let migrated_nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
4824 assert_eq!(migrated_nodes.len(), 1);
4825 assert_eq!(migrated_nodes[0].id.0, "legacy_file");
4826
4827 let migrated_edges = store.edges_by_type(GraphEdgeType::Defines, 10, 0).unwrap();
4828 assert_eq!(migrated_edges.len(), 1);
4829 assert_eq!(migrated_edges[0].id.0, "legacy_edge");
4830 let migrated_between = store
4831 .graph_edges_between("legacy_file", "legacy_symbol", 10)
4832 .unwrap();
4833 assert_eq!(migrated_between.len(), 1);
4834
4835 let migrated_counts = store.graph_schema_counts().unwrap();
4836 assert_eq!(migrated_counts.node_types.get("File"), Some(&1));
4837 assert_eq!(migrated_counts.edge_types.get("Defines"), Some(&1));
4838
4839 let node = GraphNode {
4840 id: NodeId::new("test_node"),
4841 node_type: GraphNodeType::File,
4842 label: "test".into(),
4843 ..Default::default()
4844 };
4845 store.replace_graph(&[node], &[]).unwrap();
4846
4847 let count: i64 = store
4848 .connection
4849 .lock()
4850 .unwrap()
4851 .query_row(
4852 "SELECT COUNT(*) FROM graph_nodes WHERE node_type = 'File'",
4853 [],
4854 |r| r.get(0),
4855 )
4856 .unwrap();
4857 assert_eq!(count, 1);
4858
4859 let version: i64 = store
4860 .connection
4861 .lock()
4862 .unwrap()
4863 .pragma_query_value(None, "user_version", |row| row.get(0))
4864 .unwrap();
4865 assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
4866
4867 let index_count: i64 = store
4868 .connection
4869 .lock()
4870 .unwrap()
4871 .query_row(
4872 "SELECT COUNT(*) FROM sqlite_master
4873 WHERE type = 'index'
4874 AND name IN (
4875 'idx_graph_nodes_type',
4876 'idx_graph_nodes_file',
4877 'idx_graph_nodes_symbol',
4878 'idx_graph_edges_type',
4879 'idx_graph_edges_from_type',
4880 'idx_graph_edges_to_type',
4881 'idx_graph_edges_source_type'
4882 )",
4883 [],
4884 |row| row.get(0),
4885 )
4886 .unwrap();
4887 assert_eq!(index_count, 7);
4888 }
4889
4890 #[test]
4891 fn test_nodes_by_type_uses_indexed_column() {
4892 let store = make_store();
4893 let node1 = GraphNode {
4894 id: NodeId::new("n1"),
4895 node_type: GraphNodeType::File,
4896 ..Default::default()
4897 };
4898 let node2 = GraphNode {
4899 id: NodeId::new("n2"),
4900 node_type: GraphNodeType::File,
4901 ..Default::default()
4902 };
4903 let node3 = GraphNode {
4904 id: NodeId::new("n3"),
4905 node_type: GraphNodeType::Function,
4906 ..Default::default()
4907 };
4908 store
4909 .replace_graph(&[node2.clone(), node3.clone(), node1.clone()], &[])
4910 .unwrap();
4911
4912 let nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
4913 assert_eq!(nodes.len(), 2);
4914 assert_eq!(nodes[0].id.0, "n1");
4915 assert_eq!(nodes[1].id.0, "n2");
4916 }
4917
4918 #[test]
4919 fn test_edges_by_type_uses_indexed_column() {
4920 let store = make_store();
4921 let node1 = GraphNode {
4922 id: NodeId::new("n1"),
4923 ..Default::default()
4924 };
4925 let node2 = GraphNode {
4926 id: NodeId::new("n2"),
4927 ..Default::default()
4928 };
4929 let edge1 = GraphEdge {
4930 id: EdgeId::new("e1"),
4931 from: NodeId::new("n1"),
4932 to: NodeId::new("n2"),
4933 edge_type: GraphEdgeType::Calls,
4934 ..Default::default()
4935 };
4936 let edge2 = GraphEdge {
4937 id: EdgeId::new("e2"),
4938 from: NodeId::new("n1"),
4939 to: NodeId::new("n2"),
4940 edge_type: GraphEdgeType::Calls,
4941 ..Default::default()
4942 };
4943 let edge3 = GraphEdge {
4944 id: EdgeId::new("e3"),
4945 from: NodeId::new("n1"),
4946 to: NodeId::new("n2"),
4947 edge_type: GraphEdgeType::Defines,
4948 ..Default::default()
4949 };
4950 store
4951 .replace_graph(
4952 &[node1, node2],
4953 &[edge2.clone(), edge3.clone(), edge1.clone()],
4954 )
4955 .unwrap();
4956
4957 let edges = store.edges_by_type(GraphEdgeType::Calls, 10, 0).unwrap();
4958 assert_eq!(edges.len(), 2);
4959 assert_eq!(edges[0].id.0, "e1");
4960 assert_eq!(edges[1].id.0, "e2");
4961 }
4962
4963 #[test]
4964 fn test_graph_edges_between_respects_limit() {
4965 let store = make_store();
4966 let node1 = GraphNode {
4967 id: NodeId::new("n1"),
4968 ..Default::default()
4969 };
4970 let node2 = GraphNode {
4971 id: NodeId::new("n2"),
4972 ..Default::default()
4973 };
4974 let edge1 = GraphEdge {
4975 id: EdgeId::new("e1"),
4976 from: NodeId::new("n1"),
4977 to: NodeId::new("n2"),
4978 ..Default::default()
4979 };
4980 let edge2 = GraphEdge {
4981 id: EdgeId::new("e2"),
4982 from: NodeId::new("n1"),
4983 to: NodeId::new("n2"),
4984 ..Default::default()
4985 };
4986 store
4987 .replace_graph(&[node1, node2], &[edge2.clone(), edge1.clone()])
4988 .unwrap();
4989
4990 let edges = store.graph_edges_between("n1", "n2", 1).unwrap();
4991 assert_eq!(edges.len(), 1);
4992 assert_eq!(edges[0].id.0, "e1");
4993 }
4994
4995 #[test]
4996 fn test_query_limit_is_capped() {
4997 assert_eq!(super::clamp_limit(0), 100);
4998 assert_eq!(super::clamp_limit(5), 5);
4999 assert_eq!(super::clamp_limit(5000), 1000);
5000 }
5001
5002 #[test]
5003 fn test_graph_schema_counts_returns_sorted_type_counts() {
5004 let store = make_store();
5005 let node1 = GraphNode {
5006 id: NodeId::new("n1"),
5007 node_type: GraphNodeType::File,
5008 ..Default::default()
5009 };
5010 let node2 = GraphNode {
5011 id: NodeId::new("n2"),
5012 node_type: GraphNodeType::File,
5013 ..Default::default()
5014 };
5015 let node3 = GraphNode {
5016 id: NodeId::new("n3"),
5017 node_type: GraphNodeType::Function,
5018 ..Default::default()
5019 };
5020 let edge1 = GraphEdge {
5021 id: EdgeId::new("e1"),
5022 from: NodeId::new("n1"),
5023 to: NodeId::new("n2"),
5024 edge_type: GraphEdgeType::Calls,
5025 ..Default::default()
5026 };
5027 store
5028 .replace_graph(&[node1, node2, node3], &[edge1])
5029 .unwrap();
5030
5031 let counts = store.graph_schema_counts().unwrap();
5032 assert_eq!(counts.node_types.get("File"), Some(&2));
5033 assert_eq!(counts.node_types.get("Function"), Some(&1));
5034 assert_eq!(counts.edge_types.get("Calls"), Some(&1));
5035 }
5036
5037 #[test]
5038 fn test_graph_counts_returns_total_nodes_and_edges() {
5039 let store = make_store();
5040 let node1 = GraphNode {
5041 id: NodeId::new("n1"),
5042 node_type: GraphNodeType::File,
5043 ..Default::default()
5044 };
5045 let node2 = GraphNode {
5046 id: NodeId::new("n2"),
5047 node_type: GraphNodeType::File,
5048 ..Default::default()
5049 };
5050 let edge1 = GraphEdge {
5051 id: EdgeId::new("e1"),
5052 from: NodeId::new("n1"),
5053 to: NodeId::new("n2"),
5054 edge_type: GraphEdgeType::Calls,
5055 ..Default::default()
5056 };
5057 store.replace_graph(&[node1, node2], &[edge1]).unwrap();
5058
5059 let overall = store.graph_counts().unwrap();
5060 assert_eq!(overall.nodes, 2);
5061 assert_eq!(overall.edges, 1);
5062 }
5063}